All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
@ 2011-11-17 21:42 Alan Stern
  2011-11-18  4:15 ` NamJae Jeon
  0 siblings, 1 reply; 10+ messages in thread
From: Alan Stern @ 2011-11-17 21:42 UTC (permalink / raw)
  To: Greg KH, Clemens Ladisch; +Cc: USB list, Kernel development list

The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.

This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called.  A
follow-on patch will add a third caller that does utilize the new
capabilities.

The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations.  But that's a subject for a
different patch.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>

---

 drivers/hv/hv_kvp.c |   10 ++++++----
 fs/fat/namei_vfat.c |    3 ++-
 fs/nls/nls_base.c   |   43 +++++++++++++++++++++++++++++++++----------
 include/linux/nls.h |    5 +++--
 4 files changed, 44 insertions(+), 17 deletions(-)

Index: usb-3.2/include/linux/nls.h
===================================================================
--- usb-3.2.orig/include/linux/nls.h
+++ usb-3.2/include/linux/nls.h
@@ -43,7 +43,7 @@ enum utf16_endian {
 	UTF16_BIG_ENDIAN
 };
 
-/* nls.c */
+/* nls_base.c */
 extern int register_nls(struct nls_table *);
 extern int unregister_nls(struct nls_table *);
 extern struct nls_table *load_nls(char *);
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_defaul
 
 extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
-extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
+extern int utf8s_to_utf16s(const u8 *s, int len,
+		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
 		enum utf16_endian endian, u8 *s, int maxlen);
 
Index: usb-3.2/fs/nls/nls_base.c
===================================================================
--- usb-3.2.orig/fs/nls/nls_base.c
+++ usb-3.2/fs/nls/nls_base.c
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, in
 }
 EXPORT_SYMBOL(utf32_to_utf8);
 
-int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
+{
+	switch (endian) {
+	default:
+		*s = (wchar_t) c;
+		break;
+	case UTF16_LITTLE_ENDIAN:
+		*s = __cpu_to_le16(c);
+		break;
+	case UTF16_BIG_ENDIAN:
+		*s = __cpu_to_be16(c);
+		break;
+	}
+}
+
+int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
+		wchar_t *pwcs, int maxlen)
 {
 	u16 *op;
 	int size;
 	unicode_t u;
 
 	op = pwcs;
-	while (*s && len > 0) {
+	while (len > 0 && maxlen > 0 && *s) {
 		if (*s & 0x80) {
 			size = utf8_to_utf32(s, len, &u);
 			if (size < 0)
 				return -EINVAL;
+			s += size;
+			len -= size;
 
 			if (u >= PLANE_SIZE) {
+				if (maxlen < 2)
+					break;
 				u -= PLANE_SIZE;
-				*op++ = (wchar_t) (SURROGATE_PAIR |
-						((u >> 10) & SURROGATE_BITS));
-				*op++ = (wchar_t) (SURROGATE_PAIR |
+				put_utf16(op++, SURROGATE_PAIR |
+						((u >> 10) & SURROGATE_BITS),
+						endian);
+				put_utf16(op++, SURROGATE_PAIR |
 						SURROGATE_LOW |
-						(u & SURROGATE_BITS));
+						(u & SURROGATE_BITS),
+						endian);
+				maxlen -= 2;
 			} else {
-				*op++ = (wchar_t) u;
+				put_utf16(op++, u, endian);
+				maxlen--;
 			}
-			s += size;
-			len -= size;
 		} else {
-			*op++ = *s++;
+			put_utf16(op++, *s++, endian);
 			len--;
+			maxlen--;
 		}
 	}
 	return op - pwcs;
Index: usb-3.2/fs/fat/namei_vfat.c
===================================================================
--- usb-3.2.orig/fs/fat/namei_vfat.c
+++ usb-3.2/fs/fat/namei_vfat.c
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, 
 	int charlen;
 
 	if (utf8) {
-		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
+		*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
+				(wchar_t *) outname, FAT_LFN_LEN + 2);
 		if (*outlen < 0)
 			return *outlen;
 		else if (*outlen > FAT_LFN_LEN)
Index: usb-3.2/drivers/hv/hv_kvp.c
===================================================================
--- usb-3.2.orig/drivers/hv/hv_kvp.c
+++ usb-3.2/drivers/hv/hv_kvp.c
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *val
 	 * The windows host expects the key/value pair to be encoded
 	 * in utf16.
 	 */
-	keylen = utf8s_to_utf16s(key_name, strlen(key_name),
-				(wchar_t *)kvp_data->data.key);
+	keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
+				(wchar_t *) kvp_data->data.key,
+				HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
 	kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
-	valuelen = utf8s_to_utf16s(value, strlen(value),
-				(wchar_t *)kvp_data->data.value);
+	valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
+				(wchar_t *) kvp_data->data.value,
+				HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
 	kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
 
 	kvp_data->data.value_type = REG_SZ; /* all our values are strings */


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-17 21:42 [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine Alan Stern
@ 2011-11-18  4:15 ` NamJae Jeon
  2011-11-18 15:08   ` Alan Stern
  0 siblings, 1 reply; 10+ messages in thread
From: NamJae Jeon @ 2011-11-18  4:15 UTC (permalink / raw)
  To: Alan Stern, OGAWA Hirofumi
  Cc: Greg KH, Clemens Ladisch, USB list, Kernel development list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 8316 bytes --]

2011/11/18 Alan Stern <stern@rowland.harvard.edu>:
> The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
> its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
> the maximum length of the output buffer or the endianness of its
> 16-bit output.
>
> This patch (as1501) adds the two missing arguments, and adjusts the
> only two places in the kernel where the function is called.  A
> follow-on patch will add a third caller that does utilize the new
> capabilities.
>
> The two conversion routines are still annoyingly inconsistent in the
> way they handle invalid byte combinations.  But that's a subject for a
> different patch.
>
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> CC: Clemens Ladisch <clemens@ladisch.de>
>
> ---
>
>  drivers/hv/hv_kvp.c |   10 ++++++----
>  fs/fat/namei_vfat.c |    3 ++-
>  fs/nls/nls_base.c   |   43 +++++++++++++++++++++++++++++++++----------
>  include/linux/nls.h |    5 +++--
>  4 files changed, 44 insertions(+), 17 deletions(-)
>
> Index: usb-3.2/include/linux/nls.h
> ===================================================================
> --- usb-3.2.orig/include/linux/nls.h
> +++ usb-3.2/include/linux/nls.h
> @@ -43,7 +43,7 @@ enum utf16_endian {
>        UTF16_BIG_ENDIAN
>  };
>
> -/* nls.c */
> +/* nls_base.c */
>  extern int register_nls(struct nls_table *);
>  extern int unregister_nls(struct nls_table *);
>  extern struct nls_table *load_nls(char *);
> @@ -52,7 +52,8 @@ extern struct nls_table *load_nls_defaul
>
>  extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
>  extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
> -extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
> +extern int utf8s_to_utf16s(const u8 *s, int len,
> +               enum utf16_endian endian, wchar_t *pwcs, int maxlen);
>  extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
>                enum utf16_endian endian, u8 *s, int maxlen);
>
> Index: usb-3.2/fs/nls/nls_base.c
> ===================================================================
> --- usb-3.2.orig/fs/nls/nls_base.c
> +++ usb-3.2/fs/nls/nls_base.c
> @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, in
>  }
>  EXPORT_SYMBOL(utf32_to_utf8);
>
> -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
> +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
> +{
> +       switch (endian) {
> +       default:
> +               *s = (wchar_t) c;
> +               break;
> +       case UTF16_LITTLE_ENDIAN:
> +               *s = __cpu_to_le16(c);
> +               break;
> +       case UTF16_BIG_ENDIAN:
> +               *s = __cpu_to_be16(c);
> +               break;
> +       }
> +}
> +
> +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
> +               wchar_t *pwcs, int maxlen)
>  {
>        u16 *op;
>        int size;
>        unicode_t u;
>
>        op = pwcs;
> -       while (*s && len > 0) {
> +       while (len > 0 && maxlen > 0 && *s) {
>                if (*s & 0x80) {
>                        size = utf8_to_utf32(s, len, &u);
>                        if (size < 0)
>                                return -EINVAL;
> +                       s += size;
> +                       len -= size;
Why did you move this code to here ?
>
>                        if (u >= PLANE_SIZE) {
> +                               if (maxlen < 2)
> +                                       break;
>                                u -= PLANE_SIZE;
> -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> -                                               ((u >> 10) & SURROGATE_BITS));
> -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> +                               put_utf16(op++, SURROGATE_PAIR |
> +                                               ((u >> 10) & SURROGATE_BITS),
> +                                               endian);
> +                               put_utf16(op++, SURROGATE_PAIR |
>                                                SURROGATE_LOW |
> -                                               (u & SURROGATE_BITS));
> +                                               (u & SURROGATE_BITS),
> +                                               endian);
> +                               maxlen -= 2;

Why did you use contants value(-2) instead of maxlen -= size; value ?

>                        } else {
> -                               *op++ = (wchar_t) u;
> +                               put_utf16(op++, u, endian);
> +                               maxlen--;
>                        }
> -                       s += size;
> -                       len -= size;
>                } else {
> -                       *op++ = *s++;
> +                       put_utf16(op++, *s++, endian);
>                        len--;
> +                       maxlen--;
>                }
>        }
>        return op - pwcs;
> Index: usb-3.2/fs/fat/namei_vfat.c
> ===================================================================
> --- usb-3.2.orig/fs/fat/namei_vfat.c
> +++ usb-3.2/fs/fat/namei_vfat.c
> @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
>        int charlen;
>
>        if (utf8) {
> -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
> +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
> +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
Is there the reason why you plus 2 to FAT_LFN_LEN ?
>                if (*outlen < 0)
>                        return *outlen;
>                else if (*outlen > FAT_LFN_LEN)
                          return -ENAMETOOLONG;
"else if (*outlen > FAT_LFN_LEN)" code  is needed ? Is there the case
that *outlen is over FAT_LFN_LEN in your patch ?

Thanks.
> Index: usb-3.2/drivers/hv/hv_kvp.c
> ===================================================================
> --- usb-3.2.orig/drivers/hv/hv_kvp.c
> +++ usb-3.2/drivers/hv/hv_kvp.c
> @@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *val
>         * The windows host expects the key/value pair to be encoded
>         * in utf16.
>         */
> -       keylen = utf8s_to_utf16s(key_name, strlen(key_name),
> -                               (wchar_t *)kvp_data->data.key);
> +       keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
> +                               (wchar_t *) kvp_data->data.key,
> +                               HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
>        kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
> -       valuelen = utf8s_to_utf16s(value, strlen(value),
> -                               (wchar_t *)kvp_data->data.value);
> +       valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
> +                               (wchar_t *) kvp_data->data.value,
> +                               HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
>        kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
>
>        kvp_data->data.value_type = REG_SZ; /* all our values are strings */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-18  4:15 ` NamJae Jeon
@ 2011-11-18 15:08   ` Alan Stern
  2011-11-19 14:12     ` NamJae Jeon
  0 siblings, 1 reply; 10+ messages in thread
From: Alan Stern @ 2011-11-18 15:08 UTC (permalink / raw)
  To: NamJae Jeon
  Cc: OGAWA Hirofumi, Greg KH, Clemens Ladisch, USB list,
	Kernel development list

On Fri, 18 Nov 2011, NamJae Jeon wrote:

> 2011/11/18 Alan Stern <stern@rowland.harvard.edu>:
> > The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
> > its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
> > the maximum length of the output buffer or the endianness of its
> > 16-bit output.
> >
> > This patch (as1501) adds the two missing arguments, and adjusts the
> > only two places in the kernel where the function is called.  A
> > follow-on patch will add a third caller that does utilize the new
> > capabilities.
> >
> > The two conversion routines are still annoyingly inconsistent in the
> > way they handle invalid byte combinations.  But that's a subject for a
> > different patch.

> > Index: usb-3.2/fs/nls/nls_base.c
> > ===================================================================
> > --- usb-3.2.orig/fs/nls/nls_base.c
> > +++ usb-3.2/fs/nls/nls_base.c
> > @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, in
> >  }
> >  EXPORT_SYMBOL(utf32_to_utf8);
> >
> > -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
> > +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
> > +{
> > +       switch (endian) {
> > +       default:
> > +               *s = (wchar_t) c;
> > +               break;
> > +       case UTF16_LITTLE_ENDIAN:
> > +               *s = __cpu_to_le16(c);
> > +               break;
> > +       case UTF16_BIG_ENDIAN:
> > +               *s = __cpu_to_be16(c);
> > +               break;
> > +       }
> > +}
> > +
> > +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
> > +               wchar_t *pwcs, int maxlen)
> >  {
> >        u16 *op;
> >        int size;
> >        unicode_t u;
> >
> >        op = pwcs;
> > -       while (*s && len > 0) {
> > +       while (len > 0 && maxlen > 0 && *s) {
> >                if (*s & 0x80) {
> >                        size = utf8_to_utf32(s, len, &u);
> >                        if (size < 0)
> >                                return -EINVAL;
> > +                       s += size;
> > +                       len -= size;
> Why did you move this code to here ?

Mainly in order to keep the counter updates near the place where the
character is read.  Also, in an earlier version of the patch, I used a
"continue" instead of the "break" statement three lines below.  For
that to work, the updates to s and len had to be moved up here.

> >                        if (u >= PLANE_SIZE) {
> > +                               if (maxlen < 2)
> > +                                       break;
> >                                u -= PLANE_SIZE;
> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> > -                                               ((u >> 10) & SURROGATE_BITS));
> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> > +                               put_utf16(op++, SURROGATE_PAIR |
> > +                                               ((u >> 10) & SURROGATE_BITS),
> > +                                               endian);
> > +                               put_utf16(op++, SURROGATE_PAIR |
> >                                                SURROGATE_LOW |
> > -                                               (u & SURROGATE_BITS));
> > +                                               (u & SURROGATE_BITS),
> > +                                               endian);
> > +                               maxlen -= 2;
> 
> Why did you use contants value(-2) instead of maxlen -= size; value ?

"maxlen -= size" would be completely wrong, because size is the length
of the utf8 input and maxlen is the number of 16-bit slots remaining
in the output buffer.  A surrogate pair uses two 16-bit values,
therefore maxlen has to be decreased by 2.

> >                        } else {
> > -                               *op++ = (wchar_t) u;
> > +                               put_utf16(op++, u, endian);
> > +                               maxlen--;
> >                        }
> > -                       s += size;
> > -                       len -= size;
> >                } else {
> > -                       *op++ = *s++;
> > +                       put_utf16(op++, *s++, endian);
> >                        len--;
> > +                       maxlen--;
> >                }
> >        }
> >        return op - pwcs;
> > Index: usb-3.2/fs/fat/namei_vfat.c
> > ===================================================================
> > --- usb-3.2.orig/fs/fat/namei_vfat.c
> > +++ usb-3.2/fs/fat/namei_vfat.c
> > @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
> >        int charlen;
> >
> >        if (utf8) {
> > -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
> > +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
> > +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
> Is there the reason why you plus 2 to FAT_LFN_LEN ?

So that the "(*outlen > FAT_LFN_LEN)" test below will work correctly.
If the maximum length were set to FAT_LFN_LEN then the test would
always fail.  If the maximum length were set to FAT_LFN_LEN + 1 then
the test would fail when the next character to be stored was a
surrogate pair.

> >                if (*outlen < 0)
> >                        return *outlen;
> >                else if (*outlen > FAT_LFN_LEN)
>                           return -ENAMETOOLONG;
> "else if (*outlen > FAT_LFN_LEN)" code  is needed ? Is there the case
> that *outlen is over FAT_LFN_LEN in your patch ?

I have no idea.  That test was already there, I didn't add or change it.

> Thanks.

Alan Stern


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-18 15:08   ` Alan Stern
@ 2011-11-19 14:12     ` NamJae Jeon
  2011-11-19 15:28       ` Alan Stern
  0 siblings, 1 reply; 10+ messages in thread
From: NamJae Jeon @ 2011-11-19 14:12 UTC (permalink / raw)
  To: Alan Stern
  Cc: OGAWA Hirofumi, Greg KH, Clemens Ladisch, USB list,
	Kernel development list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 6948 bytes --]

2011/11/19 Alan Stern <stern@rowland.harvard.edu>:
> On Fri, 18 Nov 2011, NamJae Jeon wrote:
>
>> 2011/11/18 Alan Stern <stern@rowland.harvard.edu>:
>> > The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
>> > its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
>> > the maximum length of the output buffer or the endianness of its
>> > 16-bit output.
>> >
>> > This patch (as1501) adds the two missing arguments, and adjusts the
>> > only two places in the kernel where the function is called.  A
>> > follow-on patch will add a third caller that does utilize the new
>> > capabilities.
>> >
>> > The two conversion routines are still annoyingly inconsistent in the
>> > way they handle invalid byte combinations.  But that's a subject for a
>> > different patch.
>
>> > Index: usb-3.2/fs/nls/nls_base.c
>> > ===================================================================
>> > --- usb-3.2.orig/fs/nls/nls_base.c
>> > +++ usb-3.2/fs/nls/nls_base.c
>> > @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, in
>> >  }
>> >  EXPORT_SYMBOL(utf32_to_utf8);
>> >
>> > -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
>> > +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
>> > +{
>> > +       switch (endian) {
>> > +       default:
>> > +               *s = (wchar_t) c;
>> > +               break;
>> > +       case UTF16_LITTLE_ENDIAN:
>> > +               *s = __cpu_to_le16(c);
>> > +               break;
>> > +       case UTF16_BIG_ENDIAN:
>> > +               *s = __cpu_to_be16(c);
>> > +               break;
>> > +       }
>> > +}
>> > +
>> > +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
>> > +               wchar_t *pwcs, int maxlen)
>> >  {
>> >        u16 *op;
>> >        int size;
>> >        unicode_t u;
>> >
>> >        op = pwcs;
>> > -       while (*s && len > 0) {
>> > +       while (len > 0 && maxlen > 0 && *s) {
>> >                if (*s & 0x80) {
>> >                        size = utf8_to_utf32(s, len, &u);
>> >                        if (size < 0)
>> >                                return -EINVAL;
>> > +                       s += size;
>> > +                       len -= size;
>> Why did you move this code to here ?
>
> Mainly in order to keep the counter updates near the place where the
> character is read.  Also, in an earlier version of the patch, I used a
> "continue" instead of the "break" statement three lines below.  For
> that to work, the updates to s and len had to be moved up here.
>
>> >                        if (u >= PLANE_SIZE) {
>> > +                               if (maxlen < 2)
>> > +                                       break;
>> >                                u -= PLANE_SIZE;
>> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>> > -                                               ((u >> 10) & SURROGATE_BITS));
>> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>> > +                               put_utf16(op++, SURROGATE_PAIR |
>> > +                                               ((u >> 10) & SURROGATE_BITS),
>> > +                                               endian);
>> > +                               put_utf16(op++, SURROGATE_PAIR |
>> >                                                SURROGATE_LOW |
>> > -                                               (u & SURROGATE_BITS));
>> > +                                               (u & SURROGATE_BITS),
>> > +                                               endian);
>> > +                               maxlen -= 2;
>>
>> Why did you use contants value(-2) instead of maxlen -= size; value ?
>
> "maxlen -= size" would be completely wrong, because size is the length
> of the utf8 input and maxlen is the number of 16-bit slots remaining
> in the output buffer.  A surrogate pair uses two 16-bit values,
> therefore maxlen has to be decreased by 2.
If so, len should also be minus -2 constant value like maxlen ?
and why does this code(if (maxlen < 2)) is needed ? If len is smaller than 2 ?

>> >                        } else {
>> > -                               *op++ = (wchar_t) u;
>> > +                               put_utf16(op++, u, endian);
>> > +                               maxlen--;
>> >                        }
>> > -                       s += size;
>> > -                       len -= size;
>> >                } else {
>> > -                       *op++ = *s++;
>> > +                       put_utf16(op++, *s++, endian);
>> >                        len--;
>> > +                       maxlen--;
>> >                }
>> >        }
>> >        return op - pwcs;
>> > Index: usb-3.2/fs/fat/namei_vfat.c
>> > ===================================================================
>> > --- usb-3.2.orig/fs/fat/namei_vfat.c
>> > +++ usb-3.2/fs/fat/namei_vfat.c
>> > @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
>> >        int charlen;
>> >
>> >        if (utf8) {
>> > -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
>> > +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
>> > +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
>> Is there the reason why you plus 2 to FAT_LFN_LEN ?
>
> So that the "(*outlen > FAT_LFN_LEN)" test below will work correctly.
> If the maximum length were set to FAT_LFN_LEN then the test would
> always fail.  If the maximum length were set to FAT_LFN_LEN + 1 then
> the test would fail when the next character to be stored was a
> surrogate pair.
Although we are using maxlen, I don't know why do we check case that
outlen is bigger than FAT_LFN_LEN.
>
>> >                if (*outlen < 0)
>> >                        return *outlen;
>> >                else if (*outlen > FAT_LFN_LEN)
>>                           return -ENAMETOOLONG;
>> "else if (*outlen > FAT_LFN_LEN)" code  is needed ? Is there the case
>> that *outlen is over FAT_LFN_LEN in your patch ?
>
> I have no idea.  That test was already there, I didn't add or change it.
>
>> Thanks.
>
> Alan Stern
>
>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-19 14:12     ` NamJae Jeon
@ 2011-11-19 15:28       ` Alan Stern
  2011-11-19 15:52         ` NamJae Jeon
  0 siblings, 1 reply; 10+ messages in thread
From: Alan Stern @ 2011-11-19 15:28 UTC (permalink / raw)
  To: NamJae Jeon
  Cc: OGAWA Hirofumi, Greg KH, Clemens Ladisch, USB list,
	Kernel development list

On Sat, 19 Nov 2011, NamJae Jeon wrote:

> >> > +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
> >> > +               wchar_t *pwcs, int maxlen)
> >> >  {
> >> >        u16 *op;
> >> >        int size;
> >> >        unicode_t u;
> >> >
> >> >        op = pwcs;
> >> > -       while (*s && len > 0) {
> >> > +       while (len > 0 && maxlen > 0 && *s) {
> >> >                if (*s & 0x80) {
> >> >                        size = utf8_to_utf32(s, len, &u);
> >> >                        if (size < 0)
> >> >                                return -EINVAL;
> >> > +                       s += size;
> >> > +                       len -= size;
...
> >> >                        if (u >= PLANE_SIZE) {
> >> > +                               if (maxlen < 2)
> >> > +                                       break;
> >> >                                u -= PLANE_SIZE;
> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> >> > -                                               ((u >> 10) & SURROGATE_BITS));
> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> >> > +                               put_utf16(op++, SURROGATE_PAIR |
> >> > +                                               ((u >> 10) & SURROGATE_BITS),
> >> > +                                               endian);
> >> > +                               put_utf16(op++, SURROGATE_PAIR |
> >> >                                                SURROGATE_LOW |
> >> > -                                               (u & SURROGATE_BITS));
> >> > +                                               (u & SURROGATE_BITS),
> >> > +                                               endian);
> >> > +                               maxlen -= 2;
> >>
> >> Why did you use contants value(-2) instead of maxlen -= size; value ?
> >
> > "maxlen -= size" would be completely wrong, because size is the length
> > of the utf8 input and maxlen is the number of 16-bit slots remaining
> > in the output buffer.  A surrogate pair uses two 16-bit values,
> > therefore maxlen has to be decreased by 2.
> If so, len should also be minus -2 constant value like maxlen ?

You seem to be confused.  "len" refers to the input string and "maxlen" 
refers to the output string.  They have no connection to one another.  
Would it help if "maxlen" were named "maxout" instead?

> and why does this code(if (maxlen < 2)) is needed ? If len is smaller than 2 ?

If maxlen < 2 then there is room in the output buffer for only one more
data value -- but a surrogate pair occupies two data values.  Hence
there isn't room to store the pair in the output buffer, so the loop
must terminate.

> >> >                        } else {
> >> > -                               *op++ = (wchar_t) u;
> >> > +                               put_utf16(op++, u, endian);
> >> > +                               maxlen--;
> >> >                        }
> >> > -                       s += size;
> >> > -                       len -= size;
> >> >                } else {
> >> > -                       *op++ = *s++;
> >> > +                       put_utf16(op++, *s++, endian);
> >> >                        len--;
> >> > +                       maxlen--;
> >> >                }
> >> >        }
> >> >        return op - pwcs;
> >> > Index: usb-3.2/fs/fat/namei_vfat.c
> >> > ===================================================================
> >> > --- usb-3.2.orig/fs/fat/namei_vfat.c
> >> > +++ usb-3.2/fs/fat/namei_vfat.c
> >> > @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
> >> >        int charlen;
> >> >
> >> >        if (utf8) {
> >> > -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
> >> > +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
> >> > +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
> >> Is there the reason why you plus 2 to FAT_LFN_LEN ?
> >
> > So that the "(*outlen > FAT_LFN_LEN)" test below will work correctly.
> > If the maximum length were set to FAT_LFN_LEN then the test would
> > always fail.  If the maximum length were set to FAT_LFN_LEN + 1 then
> > the test would fail when the next character to be stored was a
> > surrogate pair.
> Although we are using maxlen, I don't know why do we check case that
> outlen is bigger than FAT_LFN_LEN.

Probably because the filesystem code requires that the UTF16 string fit 
into a certain amount of space.  If *outlen > FAT_LFN_LEN then the 
string doesn't fit.

Alan Stern


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-19 15:28       ` Alan Stern
@ 2011-11-19 15:52         ` NamJae Jeon
  2011-11-19 15:54           ` NamJae Jeon
  2011-11-21 15:15           ` [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines Alan Stern
  0 siblings, 2 replies; 10+ messages in thread
From: NamJae Jeon @ 2011-11-19 15:52 UTC (permalink / raw)
  To: Alan Stern
  Cc: OGAWA Hirofumi, Greg KH, Clemens Ladisch, USB list,
	Kernel development list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 7044 bytes --]

2011/11/20 Alan Stern <stern@rowland.harvard.edu>:
> On Sat, 19 Nov 2011, NamJae Jeon wrote:
>
>> >> > +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
>> >> > +               wchar_t *pwcs, int maxlen)
>> >> >  {
>> >> >        u16 *op;
>> >> >        int size;
>> >> >        unicode_t u;
>> >> >
>> >> >        op = pwcs;
>> >> > -       while (*s && len > 0) {
>> >> > +       while (len > 0 && maxlen > 0 && *s) {
>> >> >                if (*s & 0x80) {
>> >> >                        size = utf8_to_utf32(s, len, &u);
>> >> >                        if (size < 0)
>> >> >                                return -EINVAL;
>> >> > +                       s += size;
>> >> > +                       len -= size;
> ...
>> >> >                        if (u >= PLANE_SIZE) {
>> >> > +                               if (maxlen < 2)
>> >> > +                                       break;
>> >> >                                u -= PLANE_SIZE;
>> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>> >> > -                                               ((u >> 10) & SURROGATE_BITS));
>> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>> >> > +                               put_utf16(op++, SURROGATE_PAIR |
>> >> > +                                               ((u >> 10) & SURROGATE_BITS),
>> >> > +                                               endian);
>> >> > +                               put_utf16(op++, SURROGATE_PAIR |
>> >> >                                                SURROGATE_LOW |
>> >> > -                                               (u & SURROGATE_BITS));
>> >> > +                                               (u & SURROGATE_BITS),
>> >> > +                                               endian);
>> >> > +                               maxlen -= 2;
>> >>
>> >> Why did you use contants value(-2) instead of maxlen -= size; value ?
>> >
>> > "maxlen -= size" would be completely wrong, because size is the length
>> > of the utf8 input and maxlen is the number of 16-bit slots remaining
>> > in the output buffer.  A surrogate pair uses two 16-bit values,
>> > therefore maxlen has to be decreased by 2.
>> If so, len should also be minus -2 constant value like maxlen ?
>
> You seem to be confused.  "len" refers to the input string and "maxlen"
> refers to the output string.  They have no connection to one another.
> Would it help if "maxlen" were named "maxout" instead?
okay, I knew clearly. If you will, I'm grateful.
I suggest code is a little changed about maxout. how do you think ?
--------------------------------------------------------------------------------------------------------------
+                       s += size;
+                       len -= size;
+                       maxout--

                       if (u >= PLANE_SIZE) {
+                               if (maxlen < 2)
+                                       break;
                               u -= PLANE_SIZE;
-                               *op++ = (wchar_t) (SURROGATE_PAIR |
-                                               ((u >> 10) & SURROGATE_BITS));
-                               *op++ = (wchar_t) (SURROGATE_PAIR |
+                               put_utf16(op++, SURROGATE_PAIR |
+                                               ((u >> 10) & SURROGATE_BITS),
+                                               endian);
+                               put_utf16(op++, SURROGATE_PAIR |
                                               SURROGATE_LOW |
-                                               (u & SURROGATE_BITS));
+                                               (u & SURROGATE_BITS),
+                                               endian);
+                               maxlen--;
                       } else {
-                               *op++ = (wchar_t) u;
+                               put_utf16(op++, u, endian);
                     }

-----------------------------------------------------------------------------------------------------------------------------------------

>
>> and why does this code(if (maxlen < 2)) is needed ? If len is smaller than 2 ?
>
> If maxlen < 2 then there is room in the output buffer for only one more
> data value -- but a surrogate pair occupies two data values.  Hence
> there isn't room to store the pair in the output buffer, so the loop
> must terminate.
>
>> >> >                        } else {
>> >> > -                               *op++ = (wchar_t) u;
>> >> > +                               put_utf16(op++, u, endian);
>> >> > +                               maxlen--;
>> >> >                        }
>> >> > -                       s += size;
>> >> > -                       len -= size;
>> >> >                } else {
>> >> > -                       *op++ = *s++;
>> >> > +                       put_utf16(op++, *s++, endian);
>> >> >                        len--;
>> >> > +                       maxlen--;
>> >> >                }
>> >> >        }
>> >> >        return op - pwcs;
>> >> > Index: usb-3.2/fs/fat/namei_vfat.c
>> >> > ===================================================================
>> >> > --- usb-3.2.orig/fs/fat/namei_vfat.c
>> >> > +++ usb-3.2/fs/fat/namei_vfat.c
>> >> > @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
>> >> >        int charlen;
>> >> >
>> >> >        if (utf8) {
>> >> > -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
>> >> > +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
>> >> > +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
>> >> Is there the reason why you plus 2 to FAT_LFN_LEN ?
>> >
>> > So that the "(*outlen > FAT_LFN_LEN)" test below will work correctly.
>> > If the maximum length were set to FAT_LFN_LEN then the test would
>> > always fail.  If the maximum length were set to FAT_LFN_LEN + 1 then
>> > the test would fail when the next character to be stored was a
>> > surrogate pair.
>> Although we are using maxlen, I don't know why do we check case that
>> outlen is bigger than FAT_LFN_LEN.
>
> Probably because the filesystem code requires that the UTF16 string fit
> into a certain amount of space.  If *outlen > FAT_LFN_LEN then the
> string doesn't fit.
>
> Alan Stern
>
>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine
  2011-11-19 15:52         ` NamJae Jeon
@ 2011-11-19 15:54           ` NamJae Jeon
  2011-11-21 15:15           ` [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines Alan Stern
  1 sibling, 0 replies; 10+ messages in thread
From: NamJae Jeon @ 2011-11-19 15:54 UTC (permalink / raw)
  To: Alan Stern
  Cc: OGAWA Hirofumi, Greg KH, Clemens Ladisch, USB list,
	Kernel development list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 7651 bytes --]

2011/11/20 NamJae Jeon <linkinjeon@gmail.com>:
> 2011/11/20 Alan Stern <stern@rowland.harvard.edu>:
>> On Sat, 19 Nov 2011, NamJae Jeon wrote:
>>
>>> >> > +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
>>> >> > +               wchar_t *pwcs, int maxlen)
>>> >> >  {
>>> >> >        u16 *op;
>>> >> >        int size;
>>> >> >        unicode_t u;
>>> >> >
>>> >> >        op = pwcs;
>>> >> > -       while (*s && len > 0) {
>>> >> > +       while (len > 0 && maxlen > 0 && *s) {
>>> >> >                if (*s & 0x80) {
>>> >> >                        size = utf8_to_utf32(s, len, &u);
>>> >> >                        if (size < 0)
>>> >> >                                return -EINVAL;
>>> >> > +                       s += size;
>>> >> > +                       len -= size;
>> ...
>>> >> >                        if (u >= PLANE_SIZE) {
>>> >> > +                               if (maxlen < 2)
>>> >> > +                                       break;
>>> >> >                                u -= PLANE_SIZE;
>>> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>>> >> > -                                               ((u >> 10) & SURROGATE_BITS));
>>> >> > -                               *op++ = (wchar_t) (SURROGATE_PAIR |
>>> >> > +                               put_utf16(op++, SURROGATE_PAIR |
>>> >> > +                                               ((u >> 10) & SURROGATE_BITS),
>>> >> > +                                               endian);
>>> >> > +                               put_utf16(op++, SURROGATE_PAIR |
>>> >> >                                                SURROGATE_LOW |
>>> >> > -                                               (u & SURROGATE_BITS));
>>> >> > +                                               (u & SURROGATE_BITS),
>>> >> > +                                               endian);
>>> >> > +                               maxlen -= 2;
>>> >>
>>> >> Why did you use contants value(-2) instead of maxlen -= size; value ?
>>> >
>>> > "maxlen -= size" would be completely wrong, because size is the length
>>> > of the utf8 input and maxlen is the number of 16-bit slots remaining
>>> > in the output buffer.  A surrogate pair uses two 16-bit values,
>>> > therefore maxlen has to be decreased by 2.
>>> If so, len should also be minus -2 constant value like maxlen ?
>>
>> You seem to be confused.  "len" refers to the input string and "maxlen"
>> refers to the output string.  They have no connection to one another.
>> Would it help if "maxlen" were named "maxout" instead?
> okay, I knew clearly. If you will, I'm grateful.
> I suggest code is a little changed about maxout. how do you think ?
> --------------------------------------------------------------------------------------------------------------
> +                       s += size;
> +                       len -= size;
> +                       maxout--
>
>                       if (u >= PLANE_SIZE) {
> +                               if (maxlen < 2)
> +                                       break;
>                               u -= PLANE_SIZE;
> -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> -                                               ((u >> 10) & SURROGATE_BITS));
> -                               *op++ = (wchar_t) (SURROGATE_PAIR |
> +                               put_utf16(op++, SURROGATE_PAIR |
> +                                               ((u >> 10) & SURROGATE_BITS),
> +                                               endian);
> +                               put_utf16(op++, SURROGATE_PAIR |
>                                               SURROGATE_LOW |
> -                                               (u & SURROGATE_BITS));
> +                                               (u & SURROGATE_BITS),
> +                                               endian);
> +                               maxout--;
sorry,, I changed maxout.
>                       } else {
> -                               *op++ = (wchar_t) u;
> +                               put_utf16(op++, u, endian);
>                     }
>
> -----------------------------------------------------------------------------------------------------------------------------------------
>
>>
>>> and why does this code(if (maxlen < 2)) is needed ? If len is smaller than 2 ?
>>
>> If maxlen < 2 then there is room in the output buffer for only one more
>> data value -- but a surrogate pair occupies two data values.  Hence
>> there isn't room to store the pair in the output buffer, so the loop
>> must terminate.
>>
>>> >> >                        } else {
>>> >> > -                               *op++ = (wchar_t) u;
>>> >> > +                               put_utf16(op++, u, endian);
>>> >> > +                               maxlen--;
>>> >> >                        }
>>> >> > -                       s += size;
>>> >> > -                       len -= size;
>>> >> >                } else {
>>> >> > -                       *op++ = *s++;
>>> >> > +                       put_utf16(op++, *s++, endian);
>>> >> >                        len--;
>>> >> > +                       maxlen--;
>>> >> >                }
>>> >> >        }
>>> >> >        return op - pwcs;
>>> >> > Index: usb-3.2/fs/fat/namei_vfat.c
>>> >> > ===================================================================
>>> >> > --- usb-3.2.orig/fs/fat/namei_vfat.c
>>> >> > +++ usb-3.2/fs/fat/namei_vfat.c
>>> >> > @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name,
>>> >> >        int charlen;
>>> >> >
>>> >> >        if (utf8) {
>>> >> > -               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
>>> >> > +               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
>>> >> > +                               (wchar_t *) outname, FAT_LFN_LEN + 2);
>>> >> Is there the reason why you plus 2 to FAT_LFN_LEN ?
>>> >
>>> > So that the "(*outlen > FAT_LFN_LEN)" test below will work correctly.
>>> > If the maximum length were set to FAT_LFN_LEN then the test would
>>> > always fail.  If the maximum length were set to FAT_LFN_LEN + 1 then
>>> > the test would fail when the next character to be stored was a
>>> > surrogate pair.
>>> Although we are using maxlen, I don't know why do we check case that
>>> outlen is bigger than FAT_LFN_LEN.
>>
>> Probably because the filesystem code requires that the UTF16 string fit
>> into a certain amount of space.  If *outlen > FAT_LFN_LEN then the
>> string doesn't fit.
>>
>> Alan Stern
>>
>>
>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines
  2011-11-19 15:52         ` NamJae Jeon
  2011-11-19 15:54           ` NamJae Jeon
@ 2011-11-21 15:15           ` Alan Stern
  2011-11-21 22:42             ` NamJae Jeon
  1 sibling, 1 reply; 10+ messages in thread
From: Alan Stern @ 2011-11-21 15:15 UTC (permalink / raw)
  To: Greg KH, NamJae Jeon
  Cc: OGAWA Hirofumi, Clemens Ladisch, USB list, Kernel development list

As requested by NamJae Jeon, this patch (as1503) changes the name of
the "maxlen" parameters to "maxout" in the various UTF conversion
routines.  This should make the role of that parameter more clear.

The patch also renames the "len" parameters to "inlen", for the same
reason.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

---

NamJae, I hope you like this better.  Since Greg has already accepted 
the first patch, I decided it was easier to write a new patch to change 
the variable names.


 fs/nls/nls_base.c |   46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

Index: usb-3.2/fs/nls/nls_base.c
===================================================================
--- usb-3.2.orig/fs/nls/nls_base.c
+++ usb-3.2/fs/nls/nls_base.c
@@ -52,7 +52,7 @@ static const struct utf8_table utf8_tabl
 #define SURROGATE_LOW	0x00000400
 #define SURROGATE_BITS	0x000003ff
 
-int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
+int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu)
 {
 	unsigned long l;
 	int c0, c, nc;
@@ -71,7 +71,7 @@ int utf8_to_utf32(const u8 *s, int len, 
 			*pu = (unicode_t) l;
 			return nc;
 		}
-		if (len <= nc)
+		if (inlen <= nc)
 			return -1;
 		s++;
 		c = (*s ^ 0x80) & 0xFF;
@@ -83,7 +83,7 @@ int utf8_to_utf32(const u8 *s, int len, 
 }
 EXPORT_SYMBOL(utf8_to_utf32);
 
-int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
+int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
 {
 	unsigned long l;
 	int c, nc;
@@ -97,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, in
 		return -1;
 
 	nc = 0;
-	for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
+	for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
 		nc++;
 		if (l <= t->lmask) {
 			c = t->shift;
@@ -129,24 +129,24 @@ static inline void put_utf16(wchar_t *s,
 	}
 }
 
-int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
-		wchar_t *pwcs, int maxlen)
+int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
+		wchar_t *pwcs, int maxout)
 {
 	u16 *op;
 	int size;
 	unicode_t u;
 
 	op = pwcs;
-	while (len > 0 && maxlen > 0 && *s) {
+	while (inlen > 0 && maxout > 0 && *s) {
 		if (*s & 0x80) {
-			size = utf8_to_utf32(s, len, &u);
+			size = utf8_to_utf32(s, inlen, &u);
 			if (size < 0)
 				return -EINVAL;
 			s += size;
-			len -= size;
+			inlen -= size;
 
 			if (u >= PLANE_SIZE) {
-				if (maxlen < 2)
+				if (maxout < 2)
 					break;
 				u -= PLANE_SIZE;
 				put_utf16(op++, SURROGATE_PAIR |
@@ -156,15 +156,15 @@ int utf8s_to_utf16s(const u8 *s, int len
 						SURROGATE_LOW |
 						(u & SURROGATE_BITS),
 						endian);
-				maxlen -= 2;
+				maxout -= 2;
 			} else {
 				put_utf16(op++, u, endian);
-				maxlen--;
+				maxout--;
 			}
 		} else {
 			put_utf16(op++, *s++, endian);
-			len--;
-			maxlen--;
+			inlen--;
+			maxout--;
 		}
 	}
 	return op - pwcs;
@@ -183,27 +183,27 @@ static inline unsigned long get_utf16(un
 	}
 }
 
-int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
-		u8 *s, int maxlen)
+int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
+		u8 *s, int maxout)
 {
 	u8 *op;
 	int size;
 	unsigned long u, v;
 
 	op = s;
-	while (len > 0 && maxlen > 0) {
+	while (inlen > 0 && maxout > 0) {
 		u = get_utf16(*pwcs, endian);
 		if (!u)
 			break;
 		pwcs++;
-		len--;
+		inlen--;
 		if (u > 0x7f) {
 			if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
 				if (u & SURROGATE_LOW) {
 					/* Ignore character and move on */
 					continue;
 				}
-				if (len <= 0)
+				if (inlen <= 0)
 					break;
 				v = get_utf16(*pwcs, endian);
 				if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
@@ -214,18 +214,18 @@ int utf16s_to_utf8s(const wchar_t *pwcs,
 				u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
 						+ (v & SURROGATE_BITS);
 				pwcs++;
-				len--;
+				inlen--;
 			}
-			size = utf32_to_utf8(u, op, maxlen);
+			size = utf32_to_utf8(u, op, maxout);
 			if (size == -1) {
 				/* Ignore character and move on */
 			} else {
 				op += size;
-				maxlen -= size;
+				maxout -= size;
 			}
 		} else {
 			*op++ = (u8) u;
-			maxlen--;
+			maxout--;
 		}
 	}
 	return op - s;


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines
  2011-11-21 15:15           ` [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines Alan Stern
@ 2011-11-21 22:42             ` NamJae Jeon
  2011-11-21 23:07               ` NamJae Jeon
  0 siblings, 1 reply; 10+ messages in thread
From: NamJae Jeon @ 2011-11-21 22:42 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, OGAWA Hirofumi, Clemens Ladisch, USB list,
	Kernel development list

2011/11/22 Alan Stern <stern@rowland.harvard.edu>:
> As requested by NamJae Jeon, this patch (as1503) changes the name of
> the "maxlen" parameters to "maxout" in the various UTF conversion
> routines.  This should make the role of that parameter more clear.
>
> The patch also renames the "len" parameters to "inlen", for the same
> reason.
>
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: NamJae Jeon <linkinjeon@gmail.com>
>
> ---
>
> NamJae, I hope you like this better.  Since Greg has already accepted
> the first patch, I decided it was easier to write a new patch to change
> the variable names.
>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines
  2011-11-21 22:42             ` NamJae Jeon
@ 2011-11-21 23:07               ` NamJae Jeon
  0 siblings, 0 replies; 10+ messages in thread
From: NamJae Jeon @ 2011-11-21 23:07 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, OGAWA Hirofumi, Clemens Ladisch, USB list,
	Kernel development list

2011/11/22 NamJae Jeon <linkinjeon@gmail.com>:
> 2011/11/22 Alan Stern <stern@rowland.harvard.edu>:
>> As requested by NamJae Jeon, this patch (as1503) changes the name of
>> the "maxlen" parameters to "maxout" in the various UTF conversion
>> routines.  This should make the role of that parameter more clear.
>>
>> The patch also renames the "len" parameters to "inlen", for the same
>> reason.
>>
>> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> Reviewed-by: NamJae Jeon <linkinjeon@gmail.com>
>>
>> ---
>>
>> NamJae, I hope you like this better.  Since Greg has already accepted
>> the first patch, I decided it was easier to write a new patch to change
>> the variable names.
>>
Hi, Alan.
Thanks a lot.
>>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-11-21 23:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-17 21:42 [PATCH] NLS: improve UTF8 -> UTF16 string conversion routine Alan Stern
2011-11-18  4:15 ` NamJae Jeon
2011-11-18 15:08   ` Alan Stern
2011-11-19 14:12     ` NamJae Jeon
2011-11-19 15:28       ` Alan Stern
2011-11-19 15:52         ` NamJae Jeon
2011-11-19 15:54           ` NamJae Jeon
2011-11-21 15:15           ` [PATCH] NLS: raname "maxlen" to "maxout" in UTF conversion routines Alan Stern
2011-11-21 22:42             ` NamJae Jeon
2011-11-21 23:07               ` NamJae Jeon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.