linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
@ 2020-11-16 14:32 Richard Fitzgerald
  2020-11-20 11:05 ` Petr Mladek
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Fitzgerald @ 2020-11-16 14:32 UTC (permalink / raw)
  To: pmladek, rostedt, sergey.senozhatsky
  Cc: linux-kernel, patches, Richard Fitzgerald

The existing code attempted to handle numbers by doing a strto[u]l(),
ignoring the field width, and then bitshifting the field out of the
converted value. If the string contains a run of valid digits longer
than will fit in a long or long long, this would overflow and no amount
of bitshifting can recover the correct value.

This patch fixes vsscanf to obey number field widths.

A new _parse_integer_limit() is added that takes a limit for the number
of characters to parse. A length of INT_MAX is effectively unlimited, as
we are not likely to need parsing of digit strings >INT_MAX length.

The number field conversion in vsscanf is changed to use this new
_parse_integer_limit() function so that field widths are obeyed when
parsing the number. Note also that the conversion is always done as a
long long - as there's currently no overflow checking there is no point
implementing separate long and long long conversions.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
---
 lib/kstrtox.c  | 14 ++++++++---
 lib/kstrtox.h  |  2 ++
 lib/vsprintf.c | 65 +++++++++++++++++++++++++++-----------------------
 3 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a14ccf905055..9867501a4ab0 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -39,20 +39,23 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
 
 /*
  * Convert non-negative integer string representation in explicitly given radix
- * to an integer.
+ * to an integer. The maximum number of characters to convert can be given.
+ * A character limit of INT_MAX is effectively unlimited since a string that
+ * long is unreasonable.
  * Return number of characters consumed maybe or-ed with overflow bit.
  * If overflow occurs, result integer (incorrect) is still returned.
  *
  * Don't you dare use this function.
  */
-unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
+				  int max_chars)
 {
 	unsigned long long res;
 	unsigned int rv;
 
 	res = 0;
 	rv = 0;
-	while (1) {
+	for (; max_chars > 0; max_chars--) {
 		unsigned int c = *s;
 		unsigned int lc = c | 0x20; /* don't tolower() this line */
 		unsigned int val;
@@ -82,6 +85,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 	return rv;
 }
 
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+	return _parse_integer_limit(s, base, p, INT_MAX);
+}
+
 static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	unsigned long long _res;
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
index 3b4637bcd254..4e5dfa3437dc 100644
--- a/lib/kstrtox.h
+++ b/lib/kstrtox.h
@@ -4,6 +4,8 @@
 
 #define KSTRTOX_OVERFLOW	(1U << 31)
 const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer_limit(const char *s, unsigned int base,
+				  unsigned long long *res, int max_chars);
 unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
 
 #endif
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 14c9a6af1b23..8ec47b5da2cb 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -53,6 +53,25 @@
 #include <linux/string_helpers.h>
 #include "kstrtox.h"
 
+static unsigned long long simple_strntoull(const char *startp, int max_chars,
+					   char **endp, unsigned int base)
+{
+	const char *cp;
+	unsigned long long result;
+	unsigned int rv;
+
+	cp = _parse_integer_fixup_radix(startp, &base);
+	max_chars -= (cp - startp);
+	rv = _parse_integer_limit(cp, base, &result, max_chars);
+	/* FIXME */
+	cp += (rv & ~KSTRTOX_OVERFLOW);
+
+	if (endp)
+		*endp = (char *)cp;
+
+	return result;
+}
+
 /**
  * simple_strtoull - convert a string to an unsigned long long
  * @cp: The start of the string
@@ -63,18 +82,7 @@
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long long result;
-	unsigned int rv;
-
-	cp = _parse_integer_fixup_radix(cp, &base);
-	rv = _parse_integer(cp, base, &result);
-	/* FIXME */
-	cp += (rv & ~KSTRTOX_OVERFLOW);
-
-	if (endp)
-		*endp = (char *)cp;
-
-	return result;
+	return simple_strntoull(cp, INT_MAX, endp, base);
 }
 EXPORT_SYMBOL(simple_strtoull);
 
@@ -126,6 +134,15 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtoll);
 
+static long long simple_strntoll(const char *cp, int max_chars, char **endp,
+				 unsigned int base)
+{
+	if (*cp == '-')
+		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
+
+	return simple_strntoull(cp, max_chars, endp, base);
+}
+
 static noinline_for_stack
 int skip_atoi(const char **s)
 {
@@ -3444,25 +3461,13 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 
 		if (is_sign)
-			val.s = qualifier != 'L' ?
-				simple_strtol(str, &next, base) :
-				simple_strtoll(str, &next, base);
+			val.s = simple_strntoll(str,
+						field_width > 0 ? field_width : INT_MAX,
+						&next, base);
 		else
-			val.u = qualifier != 'L' ?
-				simple_strtoul(str, &next, base) :
-				simple_strtoull(str, &next, base);
-
-		if (field_width > 0 && next - str > field_width) {
-			if (base == 0)
-				_parse_integer_fixup_radix(str, &base);
-			while (next - str > field_width) {
-				if (is_sign)
-					val.s = div_s64(val.s, base);
-				else
-					val.u = div_u64(val.u, base);
-				--next;
-			}
-		}
+			val.u = simple_strntoull(str,
+						 field_width > 0 ? field_width : INT_MAX,
+						 &next, base);
 
 		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
  2020-11-16 14:32 [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf Richard Fitzgerald
@ 2020-11-20 11:05 ` Petr Mladek
  2020-11-20 15:07   ` Steven Rostedt
  0 siblings, 1 reply; 6+ messages in thread
From: Petr Mladek @ 2020-11-20 11:05 UTC (permalink / raw)
  To: Richard Fitzgerald; +Cc: rostedt, sergey.senozhatsky, linux-kernel, patches

On Mon 2020-11-16 14:32:52, Richard Fitzgerald wrote:
> The existing code attempted to handle numbers by doing a strto[u]l(),
> ignoring the field width, and then bitshifting the field out of the
> converted value. If the string contains a run of valid digits longer
> than will fit in a long or long long, this would overflow and no amount
> of bitshifting can recover the correct value.
> 
> This patch fixes vsscanf to obey number field widths.
> 
> A new _parse_integer_limit() is added that takes a limit for the number
> of characters to parse. A length of INT_MAX is effectively unlimited, as
> we are not likely to need parsing of digit strings >INT_MAX length.
> 
> The number field conversion in vsscanf is changed to use this new
> _parse_integer_limit() function so that field widths are obeyed when
> parsing the number. Note also that the conversion is always done as a
> long long - as there's currently no overflow checking there is no point
> implementing separate long and long long conversions.
>
> diff --git a/lib/kstrtox.c b/lib/kstrtox.c
> index a14ccf905055..9867501a4ab0 100644
> --- a/lib/kstrtox.c
> +++ b/lib/kstrtox.c
> @@ -39,20 +39,23 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
>  
>  /*
>   * Convert non-negative integer string representation in explicitly given radix
> - * to an integer.
> + * to an integer. The maximum number of characters to convert can be given.
> + * A character limit of INT_MAX is effectively unlimited since a string that
> + * long is unreasonable.

The INT_MAX value meaning is obvious. It does not need to be
mentioned. It is the same as with vsnprintf().


>   * Return number of characters consumed maybe or-ed with overflow bit.
>   * If overflow occurs, result integer (incorrect) is still returned.
>   *
>   * Don't you dare use this function.
>   */
> -unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
> +unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
> +				  int max_chars)

Please, use size_t. Passing negative value usually means
that the caller did not handle some situation correctly.
And it actually happened in this patch, see below.

nit: better ballance the length of the lines above. I mean to move
     *p to the next line:

unsigned int _parse_integer_limit(const char *s, unsigned int base,
				  unsigned long long *p, size_t max_chars)


>  {
>  	unsigned long long res;
>  	unsigned int rv;
>  
>  	res = 0;
>  	rv = 0;
> -	while (1) {
> +	for (; max_chars > 0; max_chars--) {
>  		unsigned int c = *s;
>  		unsigned int lc = c | 0x20; /* don't tolower() this line */
>  		unsigned int val;
> @@ -82,6 +85,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
>  	return rv;
>  }
>  
> +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
> +{
> +	return _parse_integer_limit(s, base, p, INT_MAX);
> +}
> +
>  static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
>  {
>  	unsigned long long _res;
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> index 14c9a6af1b23..8ec47b5da2cb 100644
> --- a/lib/vsprintf.c
> +++ b/lib/vsprintf.c
> @@ -53,6 +53,25 @@
>  #include <linux/string_helpers.h>
>  #include "kstrtox.h"
>  
> +static unsigned long long simple_strntoull(const char *startp, int max_chars,
> +					   char **endp, unsigned int base)
> +{
> +	const char *cp;
> +	unsigned long long result;
> +	unsigned int rv;
> +
> +	cp = _parse_integer_fixup_radix(startp, &base);
> +	max_chars -= (cp - startp);

Negative value means that _parse_integer_fixup_radix() already
proceed more characters than allowed. I would handle this
the following way:

	if (cp - startp > max_chars) {
		cp = startp + max_chars;
		result = 0LL;
		goto out;

> +	rv = _parse_integer_limit(cp, base, &result, max_chars);
> +	/* FIXME */
> +	cp += (rv & ~KSTRTOX_OVERFLOW);

out:

> +	if (endp)
> +		*endp = (char *)cp;
> +
> +	return result;
> +}
> +
>  /**
>   * simple_strtoull - convert a string to an unsigned long long
>   * @cp: The start of the string
> @@ -126,6 +134,15 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
>  }
>  EXPORT_SYMBOL(simple_strtoll);
>  
> +static long long simple_strntoll(const char *cp, int max_chars, char **endp,
> +				 unsigned int base)
> +{
> +	if (*cp == '-')
> +		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
> +
> +	return simple_strntoull(cp, max_chars, endp, base);
> +}

Please, use this in simple_strtoll() like it is already done in
simple_strtoull(). I mean:

long long simple_strtoll(const char *cp, char **endp, unsigned int base)
{
	return simple_strntoll(cp, INT_MAX, endp, base);
}

> +
>  static noinline_for_stack
>  int skip_atoi(const char **s)
>  {

Finally, it would be great to add some selftests for this into
lib/test_printf.c.

Thanks a lot for working on this. I like this approach.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
  2020-11-20 11:05 ` Petr Mladek
@ 2020-11-20 15:07   ` Steven Rostedt
  2020-11-20 16:04     ` Richard Fitzgerald
  2020-11-20 16:32     ` Petr Mladek
  0 siblings, 2 replies; 6+ messages in thread
From: Steven Rostedt @ 2020-11-20 15:07 UTC (permalink / raw)
  To: Petr Mladek; +Cc: Richard Fitzgerald, sergey.senozhatsky, linux-kernel, patches

On Fri, 20 Nov 2020 12:05:25 +0100
Petr Mladek <pmladek@suse.com> wrote:

> On Mon 2020-11-16 14:32:52, Richard Fitzgerald wrote:
> > The existing code attempted to handle numbers by doing a strto[u]l(),
> > ignoring the field width, and then bitshifting the field out of the
> > converted value. If the string contains a run of valid digits longer
> > than will fit in a long or long long, this would overflow and no amount
> > of bitshifting can recover the correct value.
> > 
> > This patch fixes vsscanf to obey number field widths.
> > 
> > A new _parse_integer_limit() is added that takes a limit for the number
> > of characters to parse. A length of INT_MAX is effectively unlimited, as
> > we are not likely to need parsing of digit strings >INT_MAX length.
> > 
> > The number field conversion in vsscanf is changed to use this new
> > _parse_integer_limit() function so that field widths are obeyed when
> > parsing the number. Note also that the conversion is always done as a
> > long long - as there's currently no overflow checking there is no point
> > implementing separate long and long long conversions.
> >
> > diff --git a/lib/kstrtox.c b/lib/kstrtox.c
> > index a14ccf905055..9867501a4ab0 100644
> > --- a/lib/kstrtox.c
> > +++ b/lib/kstrtox.c
> > @@ -39,20 +39,23 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
> >  
> >  /*
> >   * Convert non-negative integer string representation in explicitly given radix
> > - * to an integer.
> > + * to an integer. The maximum number of characters to convert can be given.
> > + * A character limit of INT_MAX is effectively unlimited since a string that
> > + * long is unreasonable.  
> 
> The INT_MAX value meaning is obvious. It does not need to be
> mentioned. It is the same as with vsnprintf().

Yeah, but I never think that restating the obvious is a bad idea.
Especially when something that is obvious to us, is not obvious to a new
comer. There's been lots of times I wish someone mentioned the obvious in a
comment somewhere, because it wasn't obvious to me ;-)

I vote to keep it in.

> 
> 
> >   * Return number of characters consumed maybe or-ed with overflow bit.
> >   * If overflow occurs, result integer (incorrect) is still returned.
> >   *
> >   * Don't you dare use this function.
> >   */
> > -unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
> > +unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
> > +				  int max_chars)  
> 
> Please, use size_t. Passing negative value usually means
> that the caller did not handle some situation correctly.
> And it actually happened in this patch, see below.
> 
> nit: better ballance the length of the lines above. I mean to move
>      *p to the next line:
> 
> unsigned int _parse_integer_limit(const char *s, unsigned int base,
> 				  unsigned long long *p, size_t max_chars)
> 
> 
> >  {
> >  	unsigned long long res;
> >  	unsigned int rv;
> >  
> >  	res = 0;
> >  	rv = 0;
> > -	while (1) {
> > +	for (; max_chars > 0; max_chars--) {
> >  		unsigned int c = *s;
> >  		unsigned int lc = c | 0x20; /* don't tolower() this line */
> >  		unsigned int val;
> > @@ -82,6 +85,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
> >  	return rv;
> >  }
> >  
> > +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
> > +{
> > +	return _parse_integer_limit(s, base, p, INT_MAX);
> > +}
> > +
> >  static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
> >  {
> >  	unsigned long long _res;
> > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > index 14c9a6af1b23..8ec47b5da2cb 100644
> > --- a/lib/vsprintf.c
> > +++ b/lib/vsprintf.c
> > @@ -53,6 +53,25 @@
> >  #include <linux/string_helpers.h>
> >  #include "kstrtox.h"
> >  
> > +static unsigned long long simple_strntoull(const char *startp, int max_chars,
> > +					   char **endp, unsigned int base)
> > +{
> > +	const char *cp;
> > +	unsigned long long result;
> > +	unsigned int rv;
> > +
> > +	cp = _parse_integer_fixup_radix(startp, &base);
> > +	max_chars -= (cp - startp);  
> 
> Negative value means that _parse_integer_fixup_radix() already
> proceed more characters than allowed. I would handle this
> the following way:
> 
> 	if (cp - startp > max_chars) {
> 		cp = startp + max_chars;
> 		result = 0LL;
> 		goto out;

Agreed. I was looking at what sscanf() in user space does.

And testing it with the following:

	char *line = "0x123456789abcdef0123456789\n";
	int i;

	for (i = 0; i < 10; i++) {
		char str[32];
		long a, b;

		if (i)
			sprintf(str, "%%%dli%%9lx", i);
		else
			strcpy(str, "%li%6lx");

		ret = sscanf(line, str, &a, &b);
		switch (ret) {
		case 1:
			printf("read 1 '%s': %lx\n", str, a);
			break;
		case 2:
			printf("read 2 '%s': %lx %lx\n", str, a, b);
			break;
		default:
			printf("Failed to read: '%s' ret = %d\n", str, ret);
		}
	}

And the above produced:

read 1 '%li%6lx': 7fffffffffffffff
read 1 '%1li%9lx': 0
read 2 '%2li%9lx': 0 123456789
read 2 '%3li%9lx': 1 23456789a
read 2 '%4li%9lx': 12 3456789ab
read 2 '%5li%9lx': 123 456789abc
read 2 '%6li%9lx': 1234 56789abcd
read 2 '%7li%9lx': 12345 6789abcde
read 2 '%8li%9lx': 123456 789abcdef
read 2 '%9li%9lx': 1234567 89abcdef0

The first line I'm assuming is because %li overflowed (more digits than a
64 bit could hold).

But yeah, we could very much have cp - startp > max_chars.


> 
> > +	rv = _parse_integer_limit(cp, base, &result, max_chars);
> > +	/* FIXME */
> > +	cp += (rv & ~KSTRTOX_OVERFLOW);  
> 
> out:
> 
> > +	if (endp)
> > +		*endp = (char *)cp;
> > +
> > +	return result;
> > +}
> > +
> >  /**
> >   * simple_strtoull - convert a string to an unsigned long long
> >   * @cp: The start of the string
> > @@ -126,6 +134,15 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
> >  }
> >  EXPORT_SYMBOL(simple_strtoll);
> >  
> > +static long long simple_strntoll(const char *cp, int max_chars, char **endp,
> > +				 unsigned int base)
> > +{
> > +	if (*cp == '-')
> > +		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
> > +
> > +	return simple_strntoull(cp, max_chars, endp, base);
> > +}  
> 
> Please, use this in simple_strtoll() like it is already done in
> simple_strtoull(). I mean:
> 
> long long simple_strtoll(const char *cp, char **endp, unsigned int base)
> {
> 	return simple_strntoll(cp, INT_MAX, endp, base);
> }

Agreed.

> 
> > +
> >  static noinline_for_stack
> >  int skip_atoi(const char **s)
> >  {  
> 
> Finally, it would be great to add some selftests for this into
> lib/test_printf.c.
> 
> Thanks a lot for working on this. I like this approach.

+1

-- Steve


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
  2020-11-20 15:07   ` Steven Rostedt
@ 2020-11-20 16:04     ` Richard Fitzgerald
  2020-11-20 16:16       ` Steven Rostedt
  2020-11-20 16:32     ` Petr Mladek
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Fitzgerald @ 2020-11-20 16:04 UTC (permalink / raw)
  To: Steven Rostedt, Petr Mladek; +Cc: sergey.senozhatsky, linux-kernel, patches



On 20/11/2020 15:07, Steven Rostedt wrote:
> On Fri, 20 Nov 2020 12:05:25 +0100
> Petr Mladek <pmladek@suse.com> wrote:
> 
>> On Mon 2020-11-16 14:32:52, Richard Fitzgerald wrote:
>>> The existing code attempted to handle numbers by doing a strto[u]l(),
>>> ignoring the field width, and then bitshifting the field out of the
>>> converted value. If the string contains a run of valid digits longer
>>> than will fit in a long or long long, this would overflow and no amount
>>> of bitshifting can recover the correct value.
>>>
>>> This patch fixes vsscanf to obey number field widths.
>>>
>>> A new _parse_integer_limit() is added that takes a limit for the number
>>> of characters to parse. A length of INT_MAX is effectively unlimited, as
>>> we are not likely to need parsing of digit strings >INT_MAX length.
>>>
>>> The number field conversion in vsscanf is changed to use this new
>>> _parse_integer_limit() function so that field widths are obeyed when
>>> parsing the number. Note also that the conversion is always done as a
>>> long long - as there's currently no overflow checking there is no point
>>> implementing separate long and long long conversions.
>>>
>>> diff --git a/lib/kstrtox.c b/lib/kstrtox.c
>>> index a14ccf905055..9867501a4ab0 100644
>>> --- a/lib/kstrtox.c
>>> +++ b/lib/kstrtox.c
>>> @@ -39,20 +39,23 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
>>>   
>>>   /*
>>>    * Convert non-negative integer string representation in explicitly given radix
>>> - * to an integer.
>>> + * to an integer. The maximum number of characters to convert can be given.
>>> + * A character limit of INT_MAX is effectively unlimited since a string that
>>> + * long is unreasonable.
>>
>> The INT_MAX value meaning is obvious. It does not need to be
>> mentioned. It is the same as with vsnprintf().
> 
> Yeah, but I never think that restating the obvious is a bad idea.
> Especially when something that is obvious to us, is not obvious to a new
> comer. There's been lots of times I wish someone mentioned the obvious in a
> comment somewhere, because it wasn't obvious to me ;-)
> 
> I vote to keep it in.
> 
>>
>>
>>>    * Return number of characters consumed maybe or-ed with overflow bit.
>>>    * If overflow occurs, result integer (incorrect) is still returned.
>>>    *
>>>    * Don't you dare use this function.
>>>    */
>>> -unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
>>> +unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
>>> +				  int max_chars)
>>
>> Please, use size_t. Passing negative value usually means
>> that the caller did not handle some situation correctly.
>> And it actually happened in this patch, see below.
>>
>> nit: better ballance the length of the lines above. I mean to move
>>       *p to the next line:
>>
>> unsigned int _parse_integer_limit(const char *s, unsigned int base,
>> 				  unsigned long long *p, size_t max_chars)
>>
>>
>>>   {
>>>   	unsigned long long res;
>>>   	unsigned int rv;
>>>   
>>>   	res = 0;
>>>   	rv = 0;
>>> -	while (1) {
>>> +	for (; max_chars > 0; max_chars--) {
>>>   		unsigned int c = *s;
>>>   		unsigned int lc = c | 0x20; /* don't tolower() this line */
>>>   		unsigned int val;
>>> @@ -82,6 +85,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
>>>   	return rv;
>>>   }
>>>   
>>> +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
>>> +{
>>> +	return _parse_integer_limit(s, base, p, INT_MAX);
>>> +}
>>> +
>>>   static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
>>>   {
>>>   	unsigned long long _res;
>>> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
>>> index 14c9a6af1b23..8ec47b5da2cb 100644
>>> --- a/lib/vsprintf.c
>>> +++ b/lib/vsprintf.c
>>> @@ -53,6 +53,25 @@
>>>   #include <linux/string_helpers.h>
>>>   #include "kstrtox.h"
>>>   
>>> +static unsigned long long simple_strntoull(const char *startp, int max_chars,
>>> +					   char **endp, unsigned int base)
>>> +{
>>> +	const char *cp;
>>> +	unsigned long long result;
>>> +	unsigned int rv;
>>> +
>>> +	cp = _parse_integer_fixup_radix(startp, &base);
>>> +	max_chars -= (cp - startp);
>>
>> Negative value means that _parse_integer_fixup_radix() already
>> proceed more characters than allowed. I would handle this
>> the following way:
>>
>> 	if (cp - startp > max_chars) {
>> 		cp = startp + max_chars;
>> 		result = 0LL;
>> 		goto out;
> 
> Agreed. I was looking at what sscanf() in user space does.
> 
> And testing it with the following:
> 
> 	char *line = "0x123456789abcdef0123456789\n";
> 	int i;
> 
> 	for (i = 0; i < 10; i++) {
> 		char str[32];
> 		long a, b;
> 
> 		if (i)
> 			sprintf(str, "%%%dli%%9lx", i);
> 		else
> 			strcpy(str, "%li%6lx");
> 
> 		ret = sscanf(line, str, &a, &b);
> 		switch (ret) {
> 		case 1:
> 			printf("read 1 '%s': %lx\n", str, a);
> 			break;
> 		case 2:
> 			printf("read 2 '%s': %lx %lx\n", str, a, b);
> 			break;
> 		default:
> 			printf("Failed to read: '%s' ret = %d\n", str, ret);
> 		}
> 	}
> 
> And the above produced:
> 
> read 1 '%li%6lx': 7fffffffffffffff
> read 1 '%1li%9lx': 0
> read 2 '%2li%9lx': 0 123456789
> read 2 '%3li%9lx': 1 23456789a
> read 2 '%4li%9lx': 12 3456789ab
> read 2 '%5li%9lx': 123 456789abc
> read 2 '%6li%9lx': 1234 56789abcd
> read 2 '%7li%9lx': 12345 6789abcde
> read 2 '%8li%9lx': 123456 789abcdef
> read 2 '%9li%9lx': 1234567 89abcdef0
> 
> The first line I'm assuming is because %li overflowed (more digits than a
> 64 bit could hold).
> 
> But yeah, we could very much have cp - startp > max_chars.
> 

My code handles the prefix overflow, but I did it by having
__parse_integer_limit() simply give 0 if max_chars <= 0.

So if the field width isn't enough for the prefix/leading '-' and at
least one digit, subtracting the prefix length from the field length
will give a max_chars <= 0. And you'll get a result of 0 as in your
'%2li%9lx' test case.

I thought this was nice because it didn't need to add code to check
for the prefix overflow - it comes inherently from the loop in
__parse_integer_limit(). But I'm willing to change
__parse_integer_limit() to take an unsigned and add explicit checks for
the prefix/'-' overflow cases.

> 
>>
>>> +	rv = _parse_integer_limit(cp, base, &result, max_chars);
>>> +	/* FIXME */
>>> +	cp += (rv & ~KSTRTOX_OVERFLOW);
>>
>> out:
>>
>>> +	if (endp)
>>> +		*endp = (char *)cp;
>>> +
>>> +	return result;
>>> +}
>>> +
>>>   /**
>>>    * simple_strtoull - convert a string to an unsigned long long
>>>    * @cp: The start of the string
>>> @@ -126,6 +134,15 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
>>>   }
>>>   EXPORT_SYMBOL(simple_strtoll);
>>>   
>>> +static long long simple_strntoll(const char *cp, int max_chars, char **endp,
>>> +				 unsigned int base)
>>> +{
>>> +	if (*cp == '-')
>>> +		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
>>> +
>>> +	return simple_strntoull(cp, max_chars, endp, base);
>>> +}
>>
>> Please, use this in simple_strtoll() like it is already done in
>> simple_strtoull(). I mean:
>>
>> long long simple_strtoll(const char *cp, char **endp, unsigned int base)
>> {
>> 	return simple_strntoll(cp, INT_MAX, endp, base);
>> }
> 
> Agreed.
> 
>>
>>> +
>>>   static noinline_for_stack
>>>   int skip_atoi(const char **s)
>>>   {
>>
>> Finally, it would be great to add some selftests for this into
>> lib/test_printf.c.
>>
>> Thanks a lot for working on this. I like this approach.
> 
> +1
> 
> -- Steve
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
  2020-11-20 16:04     ` Richard Fitzgerald
@ 2020-11-20 16:16       ` Steven Rostedt
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2020-11-20 16:16 UTC (permalink / raw)
  To: Richard Fitzgerald; +Cc: Petr Mladek, sergey.senozhatsky, linux-kernel, patches

On Fri, 20 Nov 2020 16:04:15 +0000
Richard Fitzgerald <rf@opensource.cirrus.com> wrote:

> > But yeah, we could very much have cp - startp > max_chars.
> >   
> 
> My code handles the prefix overflow, but I did it by having
> __parse_integer_limit() simply give 0 if max_chars <= 0.

Yeah, but if that's the intended result, then it needs to be commented as
such, and the comment needs to be fixed. Right now it's:

/*
 * Convert non-negative integer string representation in explicitly given radix
 * to an integer. The maximum number of characters to convert can be given.
 * A character limit of INT_MAX is effectively unlimited since a string that
 * long is unreasonable.
 * Return number of characters consumed maybe or-ed with overflow bit.
 * If overflow occurs, result integer (incorrect) is still returned.
 *
 * Don't you dare use this function.
 */

That comment needs to include: "If max_chars <= 0, then this returns zero.".

-- Steve


> 
> So if the field width isn't enough for the prefix/leading '-' and at
> least one digit, subtracting the prefix length from the field length
> will give a max_chars <= 0. And you'll get a result of 0 as in your
> '%2li%9lx' test case.
> 
> I thought this was nice because it didn't need to add code to check
> for the prefix overflow - it comes inherently from the loop in
> __parse_integer_limit(). But I'm willing to change
> __parse_integer_limit() to take an unsigned and add explicit checks for
> the prefix/'-' overflow cases.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf
  2020-11-20 15:07   ` Steven Rostedt
  2020-11-20 16:04     ` Richard Fitzgerald
@ 2020-11-20 16:32     ` Petr Mladek
  1 sibling, 0 replies; 6+ messages in thread
From: Petr Mladek @ 2020-11-20 16:32 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Richard Fitzgerald, sergey.senozhatsky, linux-kernel, patches

On Fri 2020-11-20 10:07:05, Steven Rostedt wrote:
> On Fri, 20 Nov 2020 12:05:25 +0100
> Petr Mladek <pmladek@suse.com> wrote:
> 
> > On Mon 2020-11-16 14:32:52, Richard Fitzgerald wrote:
> > > The existing code attempted to handle numbers by doing a strto[u]l(),
> > > ignoring the field width, and then bitshifting the field out of the
> > > converted value. If the string contains a run of valid digits longer
> > > than will fit in a long or long long, this would overflow and no amount
> > > of bitshifting can recover the correct value.
> > > 
> > > This patch fixes vsscanf to obey number field widths.
> > > 
> > > A new _parse_integer_limit() is added that takes a limit for the number
> > > of characters to parse. A length of INT_MAX is effectively unlimited, as
> > > we are not likely to need parsing of digit strings >INT_MAX length.
> > > 
> > > The number field conversion in vsscanf is changed to use this new
> > > _parse_integer_limit() function so that field widths are obeyed when
> > > parsing the number. Note also that the conversion is always done as a
> > > long long - as there's currently no overflow checking there is no point
> > > implementing separate long and long long conversions.
> > >
> > > diff --git a/lib/kstrtox.c b/lib/kstrtox.c
> > > index a14ccf905055..9867501a4ab0 100644
> > > --- a/lib/kstrtox.c
> > > +++ b/lib/kstrtox.c
> > > @@ -39,20 +39,23 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
> > >  
> > >  /*
> > >   * Convert non-negative integer string representation in explicitly given radix
> > > - * to an integer.
> > > + * to an integer. The maximum number of characters to convert can be given.
> > > + * A character limit of INT_MAX is effectively unlimited since a string that
> > > + * long is unreasonable.  
> > 
> > The INT_MAX value meaning is obvious. It does not need to be
> > mentioned. It is the same as with vsnprintf().
> 
> Yeah, but I never think that restating the obvious is a bad idea.
> Especially when something that is obvious to us, is not obvious to a new
> comer. There's been lots of times I wish someone mentioned the obvious in a
> comment somewhere, because it wasn't obvious to me ;-)
> 
> I vote to keep it in.

Fair enough.

I have suggested to remove it because it was somehow hard to parse for
me. It confused me more than helped. It is funny because I often write
these long and complicated sentences as well. And I really confuse
people from time to time.

I wonder if the following is more strightforward:

	* Convert non-negative integer string representation in
	* explicitly given radix to an integer.
	*
	* @max_chars limits the number of converted characters.
	* Use INT_MAX when unlimited. It is an arbitrary, big enough,
	* number that is used also by vsnprintf().



> > >   * Return number of characters consumed maybe or-ed with overflow bit.
> > >   * If overflow occurs, result integer (incorrect) is still returned.
> > >   *
> > >   * Don't you dare use this function.
> > >   */
> > > -unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
> > > +unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
> > > +				  int max_chars)  
> > 
> > Please, use size_t. Passing negative value usually means
> > that the caller did not handle some situation correctly.
> > And it actually happened in this patch, see below.
> > 
> > nit: better ballance the length of the lines above. I mean to move
> >      *p to the next line:
> > 
> > unsigned int _parse_integer_limit(const char *s, unsigned int base,
> > 				  unsigned long long *p, size_t max_chars)
> > 
> > 
> > >  {


> > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> > > index 14c9a6af1b23..8ec47b5da2cb 100644
> > > --- a/lib/vsprintf.c
> > > +++ b/lib/vsprintf.c
> > > @@ -53,6 +53,25 @@
> > >  #include <linux/string_helpers.h>
> > >  #include "kstrtox.h"
> > >  
> > > +static unsigned long long simple_strntoull(const char *startp, int max_chars,
> > > +					   char **endp, unsigned int base)
> > > +{
> > > +	const char *cp;
> > > +	unsigned long long result;
> > > +	unsigned int rv;
> > > +
> > > +	cp = _parse_integer_fixup_radix(startp, &base);
> > > +	max_chars -= (cp - startp);  
> > 
> > Negative value means that _parse_integer_fixup_radix() already
> > proceed more characters than allowed. I would handle this
> > the following way:
> > 
> > 	if (cp - startp > max_chars) {
> > 		cp = startp + max_chars;
> > 		result = 0LL;
> > 		goto out;
> 
> Agreed. I was looking at what sscanf() in user space does.
> 
> And testing it with the following:
> 
> 	char *line = "0x123456789abcdef0123456789\n";
> 	int i;
> 
> 	for (i = 0; i < 10; i++) {
> 		char str[32];
> 		long a, b;
> 
> 		if (i)
> 			sprintf(str, "%%%dli%%9lx", i);
> 		else
> 			strcpy(str, "%li%6lx");
> 
> 		ret = sscanf(line, str, &a, &b);
> 		switch (ret) {
> 		case 1:
> 			printf("read 1 '%s': %lx\n", str, a);
> 			break;
> 		case 2:
> 			printf("read 2 '%s': %lx %lx\n", str, a, b);
> 			break;
> 		default:
> 			printf("Failed to read: '%s' ret = %d\n", str, ret);
> 		}
> 	}
> 
> And the above produced:
> 
> read 1 '%li%6lx': 7fffffffffffffff
> read 1 '%1li%9lx': 0
> read 2 '%2li%9lx': 0 123456789
> read 2 '%3li%9lx': 1 23456789a
> read 2 '%4li%9lx': 12 3456789ab
> read 2 '%5li%9lx': 123 456789abc
> read 2 '%6li%9lx': 1234 56789abcd
> read 2 '%7li%9lx': 12345 6789abcde
> read 2 '%8li%9lx': 123456 789abcdef
> read 2 '%9li%9lx': 1234567 89abcdef0
> 
> The first line I'm assuming is because %li overflowed (more digits than a
> 64 bit could hold).

Yup, it looks like LONG_LONG_MAX.

> But yeah, we could very much have cp - startp > max_chars.

Thanks a lot for testing it.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-11-20 16:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-16 14:32 [PATCH] lib: vsprintf: Fix handling of number field widths in vsscanf Richard Fitzgerald
2020-11-20 11:05 ` Petr Mladek
2020-11-20 15:07   ` Steven Rostedt
2020-11-20 16:04     ` Richard Fitzgerald
2020-11-20 16:16       ` Steven Rostedt
2020-11-20 16:32     ` Petr Mladek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).