udf: Use UTF-32 <-> UTF-8 conversion functions from NLS

Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
2024-11-24 05:20:53 +07:00 · 2018-04-12 17:06:18 +02:00 · 2018-04-12 17:06:18 +02:00 · b8a41c44a4
commit b8a41c44a4
parent b8333ea1ad
1 changed files with 16 additions and 62 deletions
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@ -28,6 +28,7 @@
 #include "udf_sb.h"
 #define UNICODE_MAX 0x10ffff
 #define SURROGATE_MASK 0xfffff800
 #define SURROGATE_PAIR 0x0000d800
@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
 	if (boundlen <= 0)
 		return -ENAMETOOLONG;
-	if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
+	u_len = utf32_to_utf8(uni, out, boundlen);
-		return -EINVAL;
+	if (u_len < 0) {
-
+		if (uni > UNICODE_MAX ||
-	if (uni < 0x80) {
+		    (uni & SURROGATE_MASK) == SURROGATE_PAIR)
-		out[u_len++] = (unsigned char)uni;
+			return -EINVAL;
-	} else if (uni < 0x800) {
+		return -ENAMETOOLONG;
 		if (boundlen < 2)
 			return -ENAMETOOLONG;
 		out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
 		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
 	} else {
 		if (boundlen < 3)
 			return -ENAMETOOLONG;
 		out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
 		out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
 		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
 	}
 	return u_len;
 }
@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
 			     int boundlen,
 			     wchar_t *uni)
 {
-	unsigned int utf_char;
+	int u_len;
-	unsigned char c;
+	unicode_t c;
 	int utf_cnt, u_len;
-	utf_char = 0;
+	u_len = utf8_to_utf32(in, boundlen, &c);
-	utf_cnt = 0;
+	if (u_len < 0) {
 	for (u_len = 0; u_len < boundlen;) {
 		c = in[u_len++];
 		/* Complete a multi-byte UTF-8 character */
 		if (utf_cnt) {
 			utf_char = (utf_char << 6) | (c & 0x3f);
 			if (--utf_cnt)
 				continue;
 		} else {
 			/* Check for a multi-byte UTF-8 character */
 			if (c & 0x80) {
 				/* Start a multi-byte UTF-8 character */
 				if ((c & 0xe0) == 0xc0) {
 					utf_char = c & 0x1f;
 					utf_cnt = 1;
 				} else if ((c & 0xf0) == 0xe0) {
 					utf_char = c & 0x0f;
 					utf_cnt = 2;
 				} else if ((c & 0xf8) == 0xf0) {
 					utf_char = c & 0x07;
 					utf_cnt = 3;
 				} else if ((c & 0xfc) == 0xf8) {
 					utf_char = c & 0x03;
 					utf_cnt = 4;
 				} else if ((c & 0xfe) == 0xfc) {
 					utf_char = c & 0x01;
 					utf_cnt = 5;
 				} else {
 					utf_cnt = -1;
 					break;
 				}
 				continue;
 			} else {
 				/* Single byte UTF-8 character (most common) */
 				utf_char = c;
 			}
 		}
 		*uni = utf_char;
 		break;
 	}
 	if (utf_cnt) {
 		*uni = '?';
 		return -EINVAL;
 	}
 	if (c > MAX_WCHAR_T)
 		*uni = '?';
 	else
 		*uni = c;
 	return u_len;
 }