mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 10:10:54 +07:00
udf: Add support for decoding UTF-16 characters
Add support to decode characters outside of Base Multilingual Plane of UTF-16 encoded in CS0 charset of UDF. Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
parent
ef2e18f1fa
commit
8a0cdef161
103
fs/udf/unicode.c
103
fs/udf/unicode.c
@ -36,25 +36,6 @@
|
|||||||
#define SURROGATE_CHAR_BITS 10
|
#define SURROGATE_CHAR_BITS 10
|
||||||
#define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
|
#define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
|
||||||
|
|
||||||
static int udf_uni2char_utf8(wchar_t uni,
|
|
||||||
unsigned char *out,
|
|
||||||
int boundlen)
|
|
||||||
{
|
|
||||||
int u_len = 0;
|
|
||||||
|
|
||||||
if (boundlen <= 0)
|
|
||||||
return -ENAMETOOLONG;
|
|
||||||
|
|
||||||
u_len = utf32_to_utf8(uni, out, boundlen);
|
|
||||||
if (u_len < 0) {
|
|
||||||
if (uni > UNICODE_MAX ||
|
|
||||||
(uni & SURROGATE_MASK) == SURROGATE_PAIR)
|
|
||||||
return -EINVAL;
|
|
||||||
return -ENAMETOOLONG;
|
|
||||||
}
|
|
||||||
return u_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ILLEGAL_CHAR_MARK '_'
|
#define ILLEGAL_CHAR_MARK '_'
|
||||||
#define EXT_MARK '.'
|
#define EXT_MARK '.'
|
||||||
#define CRC_MARK '#'
|
#define CRC_MARK '#'
|
||||||
@ -62,6 +43,50 @@ static int udf_uni2char_utf8(wchar_t uni,
|
|||||||
/* Number of chars we need to store generated CRC to make filename unique */
|
/* Number of chars we need to store generated CRC to make filename unique */
|
||||||
#define CRC_LEN 5
|
#define CRC_LEN 5
|
||||||
|
|
||||||
|
static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len,
|
||||||
|
int str_i_idx, int u_ch, unicode_t *ret)
|
||||||
|
{
|
||||||
|
unicode_t c;
|
||||||
|
int start_idx = str_i_idx;
|
||||||
|
|
||||||
|
/* Expand OSTA compressed Unicode to Unicode */
|
||||||
|
c = str_i[str_i_idx++];
|
||||||
|
if (u_ch > 1)
|
||||||
|
c = (c << 8) | str_i[str_i_idx++];
|
||||||
|
if ((c & SURROGATE_MASK) == SURROGATE_PAIR) {
|
||||||
|
unicode_t next;
|
||||||
|
|
||||||
|
/* Trailing surrogate char */
|
||||||
|
if (str_i_idx >= str_i_max_len) {
|
||||||
|
c = UNICODE_MAX + 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Low surrogate must follow the high one... */
|
||||||
|
if (c & SURROGATE_LOW) {
|
||||||
|
c = UNICODE_MAX + 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN_ON_ONCE(u_ch != 2);
|
||||||
|
next = str_i[str_i_idx++] << 8;
|
||||||
|
next |= str_i[str_i_idx++];
|
||||||
|
if ((next & SURROGATE_MASK) != SURROGATE_PAIR ||
|
||||||
|
!(next & SURROGATE_LOW)) {
|
||||||
|
c = UNICODE_MAX + 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = PLANE_SIZE +
|
||||||
|
((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) +
|
||||||
|
(next & SURROGATE_CHAR_MASK);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
*ret = c;
|
||||||
|
return str_i_idx - start_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
|
static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
|
||||||
int *str_o_idx,
|
int *str_o_idx,
|
||||||
const uint8_t *str_i, int str_i_max_len,
|
const uint8_t *str_i, int str_i_max_len,
|
||||||
@ -70,27 +95,29 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
|
|||||||
int (*conv_f)(wchar_t, unsigned char *, int),
|
int (*conv_f)(wchar_t, unsigned char *, int),
|
||||||
int translate)
|
int translate)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
unicode_t c;
|
||||||
int illChar = 0;
|
int illChar = 0;
|
||||||
int len, gotch = 0;
|
int len, gotch = 0;
|
||||||
|
|
||||||
for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) {
|
while (!gotch && *str_i_idx < str_i_max_len) {
|
||||||
if (*str_o_idx >= str_o_max_len) {
|
if (*str_o_idx >= str_o_max_len) {
|
||||||
*needsCRC = 1;
|
*needsCRC = 1;
|
||||||
return gotch;
|
return gotch;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Expand OSTA compressed Unicode to Unicode */
|
len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch,
|
||||||
c = str_i[*str_i_idx];
|
&c);
|
||||||
if (u_ch > 1)
|
/* These chars cannot be converted. Replace them. */
|
||||||
c = (c << 8) | str_i[*str_i_idx + 1];
|
if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) ||
|
||||||
|
(translate && c == '/')) {
|
||||||
if (translate && (c == '/' || c == 0))
|
|
||||||
illChar = 1;
|
illChar = 1;
|
||||||
else if (illChar)
|
if (!translate)
|
||||||
|
gotch = 1;
|
||||||
|
} else if (illChar)
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
gotch = 1;
|
gotch = 1;
|
||||||
|
*str_i_idx += len;
|
||||||
}
|
}
|
||||||
if (illChar) {
|
if (illChar) {
|
||||||
*needsCRC = 1;
|
*needsCRC = 1;
|
||||||
@ -98,7 +125,15 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
|
|||||||
gotch = 1;
|
gotch = 1;
|
||||||
}
|
}
|
||||||
if (gotch) {
|
if (gotch) {
|
||||||
len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx);
|
if (conv_f) {
|
||||||
|
len = conv_f(c, &str_o[*str_o_idx],
|
||||||
|
str_o_max_len - *str_o_idx);
|
||||||
|
} else {
|
||||||
|
len = utf32_to_utf8(c, &str_o[*str_o_idx],
|
||||||
|
str_o_max_len - *str_o_idx);
|
||||||
|
if (len < 0)
|
||||||
|
len = -ENAMETOOLONG;
|
||||||
|
}
|
||||||
/* Valid character? */
|
/* Valid character? */
|
||||||
if (len >= 0)
|
if (len >= 0)
|
||||||
*str_o_idx += len;
|
*str_o_idx += len;
|
||||||
@ -106,7 +141,7 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
|
|||||||
*needsCRC = 1;
|
*needsCRC = 1;
|
||||||
gotch = 0;
|
gotch = 0;
|
||||||
} else {
|
} else {
|
||||||
str_o[(*str_o_idx)++] = '?';
|
str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK;
|
||||||
*needsCRC = 1;
|
*needsCRC = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,12 +177,10 @@ static int udf_name_from_CS0(struct super_block *sb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
|
if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
|
||||||
conv_f = udf_uni2char_utf8;
|
|
||||||
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
|
|
||||||
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
|
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
|
||||||
} else
|
else
|
||||||
BUG();
|
conv_f = NULL;
|
||||||
|
|
||||||
cmp_id = ocu[0];
|
cmp_id = ocu[0];
|
||||||
if (cmp_id != 8 && cmp_id != 16) {
|
if (cmp_id != 8 && cmp_id != 16) {
|
||||||
|
Loading…
Reference in New Issue
Block a user