[PATCH 6/8] Support non-BMP characters in UDF

* [PATCH 6/8] Support non-BMP characters in UDF
@ 2012-05-15 23:10 Vladimir 'φ-coder/phcoder' Serbinenko
  2012-05-16 14:34 ` Jan Kara
  0 siblings, 1 reply; 11+ messages in thread
From: Vladimir 'φ-coder/phcoder' Serbinenko @ 2012-05-15 23:10 UTC (permalink / raw)
  To: Jan Kara, linux-kernel, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 2004 bytes --]

I also have a counterpart for mkudffs/udf-tools but sourceforge homepage seems to be abandoned does anybody know if there is a new homepage for mkudffs?

Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
---
 fs/udf/unicode.c |   28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 9b1b2de..2d8cc12 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -280,6 +280,14 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];
 
+		if (cmp_id == 16 && (c & 0xfc00) == 0xd800
+		    && i + 1 < ocu_len && ((ocu[i] & 0xfc) == 0xdc)) {
+			uint16_t l;
+			l = ocu[i++] << 8;
+			l |= ocu[i++];
+			c = (((c & 0x3ff) << 10) | (l & 0x3ff)) + 0x10000;
+		}
+
 		len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
 				    UDF_NAME_LEN - utf_o->u_len);
 		/* Valid character? */
@@ -312,20 +320,30 @@ try_again:
 		if (!len)
 			continue;
 		/* Invalid character, deal with it */
-		if (len < 0 || uni_char > 0xffff) {
+		if (len < 0 || uni_char > 0x10ffff) {
 			len = 1;
 			uni_char = '?';
 		}
 
 		if (uni_char > max_val) {
-			max_val = 0xffffU;
+			max_val = 0x10ffffU;
 			ocu[0] = (uint8_t)0x10U;
 			goto try_again;
 		}
 
-		if (max_val == 0xffffU)
-			ocu[++u_len] = (uint8_t)(uni_char >> 8);
-		ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+		if (uni_char > 0xffff) {
+			u16 h, l;
+			h = 0xd800 | (((uni_char - 0x10000) >> 10) & 0x3ff);
+			l = 0xdc00 | ((uni_char - 0x10000) & 0x3ff);
+			ocu[++u_len] = (uint8_t)(h >> 8);
+			ocu[++u_len] = (uint8_t)(h & 0xffU);
+			ocu[++u_len] = (uint8_t)(l >> 8);
+			ocu[++u_len] = (uint8_t)(l & 0xffU);
+		} else {
+			if (max_val == 0x10ffffU)
+				ocu[++u_len] = (uint8_t)(uni_char >> 8);
+			ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+		}
 		i += len - 1;
 	}
 
-- 
1.7.10

-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 294 bytes --]

^ permalink raw reply related	[flat|nested] 11+ messages in thread