[Qemu-devel] [PATCH 3/4] qobject: Parse non-finite numbers, as an extension

From: Eric Blake <eblake@redhat.com>
To: qemu-devel@nongnu.org
Cc: armbru@redhat.com, Luiz Capitulino <lcapitulino@redhat.com>
Subject: [Qemu-devel] [PATCH 3/4] qobject: Parse non-finite numbers, as an extension
Date: Thu,  9 Jun 2016 20:48:08 -0600	[thread overview]
Message-ID: <1465526889-8339-4-git-send-email-eblake@redhat.com> (raw)
In-Reply-To: <1465526889-8339-1-git-send-email-eblake@redhat.com>

There have been times in the past where we have been careless and
allowed non-finite values to escape as a 'number' type in QMP
output (such as before commit 27ff42e).  This is in violation of
the JSON specification in RFC 7159, and usually caused by
floating-point division by 0 resulting in NaN, although infinity
is also a possible culprit.  While a future patch will tighten the
output generator to avoid a lexical error from a strict JSON
parser, we might as well first fix our parser to accept non-finite
values as an extension, so that we can always read what we have
output in the past ("be liberal in what you accept, strict in what
 you produce").

Rather than complicate the lexer to add LOTS of states for each
letter within 'nan' and 'inf[inity]', I chose to just abuse the
'keyword' token, but had to make it accept upper case.  Also, since
I want to parse '-inf', I had to tweak IN_NEG_NONZERO_NUMBER; and
renamed that in the process (we have always accepted '-0', so the
state name was a misnomer).  Then the parser then does the real magic
of creating a QFloat object if a non-finite "keyword" was recognized.

I intentionally did not support NAN(n-char-sequence) forms, even
though C99 requires it to work for strtod(), in part because
"implementation-specified" n-char-sequence is rather hard to test
in a platform-agnostic manner, and in part because we've never
actually output it.

Improve the testsuite to cover the new extension, including working
around the fact that g_assert_cmpfloat() does NOT test equivalence,
but merely mathematical equality (0.0 and -0.0 are equal but not
equivalent, NaN and NaN are equivalent but not equal), and enhance
the existing tests for -0.0 in the process.

Checkpatch has a false negative, complaining that there should be
spaces around binary '-' in what is really the float literal
-32.20e-10.  It was over my head to figure out how to silence that
one.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 qobject/json-lexer.c  | 15 ++++++++-----
 qobject/json-parser.c | 13 +++++++++--
 tests/check-qjson.c   | 62 ++++++++++++++++++++++++++++++++++++++++++++-------
 docs/qmp-spec.txt     |  4 ++++
 4 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index ebd15d8..de16219 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -18,13 +18,14 @@
 #define MAX_TOKEN_SIZE (64ULL << 20)

 /*
- * Required by JSON (RFC 7159), plus \' extension in "":
+ * Required by JSON (RFC 7159), plus \' extension in "", and extension
+ * of parsing case-insensitive non-finite numbers like "NaN" and "-Inf":
  *
  * \"([^\\\"]|(\\\"|\\'|\\\\|\\/|\\b|\\f|\\n|\\r|\\t|
  *    \\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
  * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)?
  * [{}\[\],:]
- * [a-z]+   # covers null, true, false
+ * -?[a-zA-Z]+   # covers null, true, false, nan, inf[inity]
  *
  * Extension of '' strings:
  *
@@ -58,7 +59,7 @@ enum json_lexer_state {
     IN_MANTISSA,
     IN_MANTISSA_DIGITS,
     IN_NONZERO_NUMBER,
-    IN_NEG_NONZERO_NUMBER,
+    IN_NEG_NUMBER,
     IN_KEYWORD,
     IN_ESCAPE,
     IN_ESCAPE_L,
@@ -206,15 +207,18 @@ static const uint8_t json_lexer[][256] =  {
         ['.'] = IN_MANTISSA,
     },

-    [IN_NEG_NONZERO_NUMBER] = {
+    [IN_NEG_NUMBER] = {
         ['0'] = IN_ZERO,
         ['1' ... '9'] = IN_NONZERO_NUMBER,
+        ['a' ... 'z'] = IN_KEYWORD,
+        ['A' ... 'Z'] = IN_KEYWORD,
     },

     /* keywords */
     [IN_KEYWORD] = {
         TERMINAL(JSON_KEYWORD),
         ['a' ... 'z'] = IN_KEYWORD,
+        ['A' ... 'Z'] = IN_KEYWORD,
     },

     /* whitespace */
@@ -264,7 +268,7 @@ static const uint8_t json_lexer[][256] =  {
         ['\''] = IN_SQ_STRING,
         ['0'] = IN_ZERO,
         ['1' ... '9'] = IN_NONZERO_NUMBER,
-        ['-'] = IN_NEG_NONZERO_NUMBER,
+        ['-'] = IN_NEG_NUMBER,
         ['{'] = JSON_LCURLY,
         ['}'] = JSON_RCURLY,
         ['['] = JSON_LSQUARE,
@@ -272,6 +276,7 @@ static const uint8_t json_lexer[][256] =  {
         [','] = JSON_COMMA,
         [':'] = JSON_COLON,
         ['a' ... 'z'] = IN_KEYWORD,
+        ['A' ... 'Z'] = IN_KEYWORD,
         ['%'] = IN_ESCAPE,
         [' '] = IN_WHITESPACE,
         ['\t'] = IN_WHITESPACE,
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 67ed727..12519b6 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -450,6 +450,16 @@ static QObject *parse_keyword(JSONParserContext *ctxt)
         return QOBJECT(qbool_from_bool(false));
     } else if (!strcmp(token->str, "null")) {
         return qnull();
+    } else {
+        double d;
+        char *p;
+
+        /* The lexer feeds us "NaN" and "-Inf" as keywords */
+        errno = 0;
+        d = strtod(token->str, &p);
+        if (!errno && p != token->str && !*p) {
+            return QOBJECT(qfloat_from_double(d));
+        }
     }
     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
     return NULL;
@@ -519,8 +529,7 @@ static QObject *parse_literal(JSONParserContext *ctxt)
     }
     case JSON_FLOAT:
         /* FIXME dependent on locale; a pervasive issue in QEMU */
-        /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
-         * but those might be useful extensions beyond JSON */
+        /* NaN and infinity are parsed as extensions under parse_keyword() */
         return QOBJECT(qfloat_from_double(strtod(token->str, NULL)));
     default:
         abort();
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 0e158f6..95adf64 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -11,6 +11,7 @@
  *
  */
 #include "qemu/osdep.h"
+#include <math.h>

 #include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qint.h"
@@ -932,34 +933,78 @@ static void float_number(void)
     struct {
         const char *encoded;
         double decoded;
-        int skip;
+        const char *canonical;
     } test_cases[] = {
         { "32.43", 32.43 },
         { "0.222", 0.222 },
         { "-32.12313", -32.12313 },
-        { "-32.20e-10", -32.20e-10, .skip = 1 },
+        { "-32.20e-10", -32.20e-10, "-0" }, /* FIXME: Our rounding is lousy */
+        { "-0.0", -0.0, "-0" },
         { },
     };

     for (i = 0; test_cases[i].encoded; i++) {
         QObject *obj;
         QFloat *qfloat;
+        QString *str;

         obj = qobject_from_json(test_cases[i].encoded);
         g_assert(obj != NULL);
         g_assert(qobject_type(obj) == QTYPE_QFLOAT);

         qfloat = qobject_to_qfloat(obj);
-        g_assert(qfloat_get_double(qfloat) == test_cases[i].decoded);
+        g_assert_cmpfloat(qfloat_get_double(qfloat), ==,
+                          test_cases[i].decoded);
+        g_assert_cmpint(signbit(qfloat_get_double(qfloat)), ==,
+                        signbit(test_cases[i].decoded));

-        if (test_cases[i].skip == 0) {
-            QString *str;
+        str = qobject_to_json(obj);
+        g_assert_cmpstr(qstring_get_str(str), ==,
+                        test_cases[i].canonical ?: test_cases[i].encoded);
+        QDECREF(str);
+        QDECREF(qfloat);
+    }
+}

-            str = qobject_to_json(obj);
-            g_assert(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0);
-            QDECREF(str);
+static void non_finite_number(void)
+{
+    int i;
+    struct {
+        const char *encoded;
+        double decoded;
+        const char *canonical;
+    } test_cases[] = {
+        { "nan", NAN },
+        { "NaN", NAN, "nan" },
+        /* Not all libc implementations can round-trip '-nan' */
+        /* We do not support forms like 'NAN(0)' */
+        { "inf", INFINITY },
+        { "-INFINITY", -INFINITY, "-inf" },
+        { },
+    };
+
+    for (i = 0; test_cases[i].encoded; i++) {
+        QObject *obj;
+        QFloat *qfloat;
+        QString *str;
+
+        obj = qobject_from_json(test_cases[i].encoded);
+        g_assert(obj != NULL);
+        g_assert(qobject_type(obj) == QTYPE_QFLOAT);
+
+        qfloat = qobject_to_qfloat(obj);
+        /* g_assert_cmpfloat(NAN, ==, NAN) doesn't do what we want */
+        g_assert_cmpint(fpclassify(qfloat_get_double(qfloat)), ==,
+                        fpclassify(test_cases[i].decoded));
+        if (!isnan(test_cases[i].decoded)) {
+            g_assert_cmpfloat(qfloat_get_double(qfloat), ==,
+                              test_cases[i].decoded);
         }

+        str = qobject_to_json(obj);
+        g_assert_cmpstr(qstring_get_str(str), ==,
+                        test_cases[i].canonical ?: test_cases[i].encoded);
+        QDECREF(str);
         QDECREF(qfloat);
     }
 }
@@ -1520,6 +1565,7 @@ int main(int argc, char **argv)

     g_test_add_func("/literals/number/simple", simple_number);
     g_test_add_func("/literals/number/float", float_number);
+    g_test_add_func("/literals/number/non-finite", non_finite_number);
     g_test_add_func("/literals/number/vararg", vararg_number);

     g_test_add_func("/literals/keyword", keyword_literal);
diff --git a/docs/qmp-spec.txt b/docs/qmp-spec.txt
index f8b5356..bfba431 100644
--- a/docs/qmp-spec.txt
+++ b/docs/qmp-spec.txt
@@ -51,6 +51,10 @@ json-string, and both input forms of strings understand an additional
 escape sequence of "\'" for a single quote. The server will only use
 double quoting on output.

+As an extension, the server understands case-insensitive forms of
+non-finite numbers, such as "NaN", "Inf", and "-infinity" (but not
+"NaN(...)").
+
 2.1 General Definitions
 -----------------------

-- 
2.5.5