linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4/4] Support non-BMP characters on UDF
@ 2012-06-01  1:10 Vladimir 'φ-coder/phcoder' Serbinenko
  0 siblings, 0 replies; only message in thread
From: Vladimir 'φ-coder/phcoder' Serbinenko @ 2012-06-01  1:10 UTC (permalink / raw)
  To: Jan Kara, linux-kernel, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 1950 bytes --]

Replace UCS-2 with proper UTF-16.
Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
---
 fs/udf/unicode.c |   35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 7df644d..0d1c93c 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -106,9 +106,20 @@ int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 	utf_o->u_len = 0;
 	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
 		/* Expand OSTA compressed Unicode to Unicode */
-		uint32_t c = ocu[i++];
-		if (cmp_id == 16)
-			c = (c << 8) | ocu[i++];
+		unicode_t c;
+		if (cmp_id == 8)
+			c = ocu[i++];
+		else {
+			int s = utf16s_to_unicode((u16 *) (ocu + i),
+						  (ocu_len - i) / 2,
+						  UTF16_BIG_ENDIAN_UNALIGNED,
+						  &c);
+			if (s <= 0) {
+				c = (ocu[i] << 8) | ocu[i+1];
+				s = 1;
+			}
+			i += 2 * s;
+		}
 
 		len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
 				    UDF_NAME_LEN - utf_o->u_len);
@@ -142,20 +153,28 @@ try_again:
 		if (!len)
 			continue;
 		/* Invalid character, deal with it */
-		if (len < 0 || uni_char > 0xffff) {
+		if (len < 0 || uni_char > 0x10ffff) {
 			len = 1;
 			uni_char = '?';
 		}
 
 		if (uni_char > max_val) {
-			max_val = 0xffffU;
+			max_val = 0x10ffffU;
 			ocu[0] = (uint8_t)0x10U;
 			goto try_again;
 		}
 
-		if (max_val == 0xffffU)
-			ocu[++u_len] = (uint8_t)(uni_char >> 8);
-		ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+		if (max_val == 0x10ffffU) {
+			int s;
+			s = unicode_to_utf16s(uni_char,
+					      UTF16_BIG_ENDIAN_UNALIGNED,
+					      (u16 *) (ocu + u_len + 1),
+					      (length - (u_len + 1)) / 2);
+			if (s <= 0)
+				break;
+			u_len += 2 * s;
+		} else
+			ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
 		i += len - 1;
 	}
 
-- 
1.7.10

-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 294 bytes --]

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2012-06-01  1:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-01  1:10 [PATCH 4/4] Support non-BMP characters on UDF Vladimir 'φ-coder/phcoder' Serbinenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).