Bug 507998 - Add line/column information to the errors reported by JSON.parse, pointing out exactly where in the input string the first bad character was found. r=njn, r=Waldo, rs=jorendorff on the delta (particularly the Unicode escape bits) from the originally-posted patch

2024-10-13 13:25:37 +00:00 · 2013-12-19 14:40:58 -05:00 · 2013-12-19 14:40:58 -05:00 · 2abc272a9b
commit 2abc272a9b
parent bb7f1ca7da
6 changed files with 204 additions and 33 deletions
--- a/js/src/js.msg
+++ b/js/src/js.msg
@ -278,7 +278,7 @@ MSG_DEF(JSMSG_BAD_OBJECT_INIT,        224, 0, JSEXN_SYNTAXERR, "invalid object i
 MSG_DEF(JSMSG_CANT_SET_ARRAY_ATTRS,   225, 0, JSEXN_INTERNALERR, "can't set attributes on indexed array properties")
 MSG_DEF(JSMSG_EVAL_ARITY,             226, 0, JSEXN_TYPEERR, "eval accepts only one parameter")
 MSG_DEF(JSMSG_MISSING_FUN_ARG,        227, 2, JSEXN_TYPEERR, "missing argument {0} when calling function {1}")
-MSG_DEF(JSMSG_JSON_BAD_PARSE,         228, 1, JSEXN_SYNTAXERR, "JSON.parse: {0}")
+MSG_DEF(JSMSG_JSON_BAD_PARSE,         228, 3, JSEXN_SYNTAXERR, "JSON.parse: {0} at line {1} column {2} of the JSON data")
 MSG_DEF(JSMSG_JSON_BAD_STRINGIFY,     229, 0, JSEXN_ERR, "JSON.stringify")
 MSG_DEF(JSMSG_NOT_CALLABLE_OR_UNDEFINED, 230, 0, JSEXN_TYPEERR, "value is not a function or undefined")
 MSG_DEF(JSMSG_NOT_NONNULL_OBJECT,     231, 0, JSEXN_TYPEERR, "value is not a non-null object")
--- a/js/src/jsapi-tests/testParseJSON.cpp
+++ b/js/src/jsapi-tests/testParseJSON.cpp
@ -164,30 +164,123 @@ END_TEST(testParseJSON_success)

 BEGIN_TEST(testParseJSON_error)
 {
-    CHECK(Error(cx, "["));
-    CHECK(Error(cx, "[,]"));
-    CHECK(Error(cx, "[1,]"));
-    CHECK(Error(cx, "{a:2}"));
-    CHECK(Error(cx, "{\"a\":2,}"));
-    CHECK(Error(cx, "]"));
-    CHECK(Error(cx, "'bad string'"));
-    CHECK(Error(cx, "\""));
-    CHECK(Error(cx, "{]"));
-    CHECK(Error(cx, "[}"));
+    CHECK(Error(cx, ""                                  , "1", "1"));
+    CHECK(Error(cx, "\n"                                , "2", "1"));
+    CHECK(Error(cx, "\r"                                , "2", "1"));
+    CHECK(Error(cx, "\r\n"                              , "2", "1"));
+
+    CHECK(Error(cx, "["                                 , "1", "2"));
+    CHECK(Error(cx, "[,]"                               , "1", "2"));
+    CHECK(Error(cx, "[1,]"                              , "1", "4"));
+    CHECK(Error(cx, "{a:2}"                             , "1", "2"));
+    CHECK(Error(cx, "{\"a\":2,}"                        , "1", "8"));
+    CHECK(Error(cx, "]"                                 , "1", "1"));
+    CHECK(Error(cx, "\""                                , "1", "2"));
+    CHECK(Error(cx, "{]"                                , "1", "2"));
+    CHECK(Error(cx, "[}"                                , "1", "2"));
+    CHECK(Error(cx, "'wrongly-quoted string'"           , "1", "1"));
+
+    CHECK(Error(cx, "{\"a\":2 \n b:3}"                  , "2", "2"));
+    CHECK(Error(cx, "\n["                               , "2", "2"));
+    CHECK(Error(cx, "\n[,]"                             , "2", "2"));
+    CHECK(Error(cx, "\n[1,]"                            , "2", "4"));
+    CHECK(Error(cx, "\n{a:2}"                           , "2", "2"));
+    CHECK(Error(cx, "\n{\"a\":2,}"                      , "2", "8"));
+    CHECK(Error(cx, "\n]"                               , "2", "1"));
+    CHECK(Error(cx, "\"bad string\n\""                  , "1", "12"));
+    CHECK(Error(cx, "\r'wrongly-quoted string'"         , "2", "1"));
+    CHECK(Error(cx, "\n\""                              , "2", "2"));
+    CHECK(Error(cx, "\n{]"                              , "2", "2"));
+    CHECK(Error(cx, "\n[}"                              , "2", "2"));
+    CHECK(Error(cx, "{\"a\":[2,3],\n\"b\":,5,6}"        , "2", "5"));
+
+    CHECK(Error(cx, "{\"a\":2 \r b:3}"                  , "2", "2"));
+    CHECK(Error(cx, "\r["                               , "2", "2"));
+    CHECK(Error(cx, "\r[,]"                             , "2", "2"));
+    CHECK(Error(cx, "\r[1,]"                            , "2", "4"));
+    CHECK(Error(cx, "\r{a:2}"                           , "2", "2"));
+    CHECK(Error(cx, "\r{\"a\":2,}"                      , "2", "8"));
+    CHECK(Error(cx, "\r]"                               , "2", "1"));
+    CHECK(Error(cx, "\"bad string\r\""                  , "1", "12"));
+    CHECK(Error(cx, "\r'wrongly-quoted string'"       , "2", "1"));
+    CHECK(Error(cx, "\r\""                              , "2", "2"));
+    CHECK(Error(cx, "\r{]"                              , "2", "2"));
+    CHECK(Error(cx, "\r[}"                              , "2", "2"));
+    CHECK(Error(cx, "{\"a\":[2,3],\r\"b\":,5,6}"        , "2", "5"));
+
+    CHECK(Error(cx, "{\"a\":2 \r\n b:3}"                , "2", "2"));
+    CHECK(Error(cx, "\r\n["                             , "2", "2"));
+    CHECK(Error(cx, "\r\n[,]"                           , "2", "2"));
+    CHECK(Error(cx, "\r\n[1,]"                          , "2", "4"));
+    CHECK(Error(cx, "\r\n{a:2}"                         , "2", "2"));
+    CHECK(Error(cx, "\r\n{\"a\":2,}"                    , "2", "8"));
+    CHECK(Error(cx, "\r\n]"                             , "2", "1"));
+    CHECK(Error(cx, "\"bad string\r\n\""                , "1", "12"));
+    CHECK(Error(cx, "\r\n'wrongly-quoted string'"       , "2", "1"));
+    CHECK(Error(cx, "\r\n\""                            , "2", "2"));
+    CHECK(Error(cx, "\r\n{]"                            , "2", "2"));
+    CHECK(Error(cx, "\r\n[}"                            , "2", "2"));
+    CHECK(Error(cx, "{\"a\":[2,3],\r\n\"b\":,5,6}"      , "2", "5"));
+
+    CHECK(Error(cx, "\n\"bad string\n\""                , "2", "12"));
+    CHECK(Error(cx, "\r\"bad string\r\""                , "2", "12"));
+    CHECK(Error(cx, "\r\n\"bad string\r\n\""            , "2", "12"));
+
+    CHECK(Error(cx, "{\n\"a\":[2,3],\r\"b\":,5,6}"      , "3", "5"));
+    CHECK(Error(cx, "{\r\"a\":[2,3],\n\"b\":,5,6}"      , "3", "5"));
+    CHECK(Error(cx, "[\"\\t\\q"                         , "1", "6"));
+    CHECK(Error(cx, "[\"\\t\u0000"                      , "1", "5"));
+
+    // Unicode escape errors are messy.  The first bad character could be
+    // non-hexadecimal, or it could be absent entirely.  Include tests where
+    // there's a bad character, followed by zero to as many characters as are
+    // needed to form a complete Unicode escape sequence, plus one.  (The extra
+    // characters beyond are valuable because our implementation checks for
+    // too-few subsequent characters first, before checking for subsequent
+    // non-hexadecimal characters.  So \u<END>, \u0<END>, \u00<END>, and
+    // \u000<END> are all *detected* as invalid by the same code path, but the
+    // process of computing the first invalid character follows a different
+    // code path for each.  And \uQQQQ, \u0QQQ, \u00QQ, and \u000Q are detected
+    // as invalid by the same code path [ignoring which precise subexpression
+    // triggers failure of a single condition], but the computation of the
+    // first invalid character follows a different code path for each.)
+    CHECK(Error(cx, "[\"\\t\\u"                         , "1", "7"));
+    CHECK(Error(cx, "[\"\\t\\uZ"                        , "1", "7"));
+    CHECK(Error(cx, "[\"\\t\\uZZ"                       , "1", "7"));
+    CHECK(Error(cx, "[\"\\t\\uZZZ"                      , "1", "7"));
+    CHECK(Error(cx, "[\"\\t\\uZZZZ"                     , "1", "7"));
+    CHECK(Error(cx, "[\"\\t\\uZZZZZ"                    , "1", "7"));
+
+    CHECK(Error(cx, "[\"\\t\\u0"                        , "1", "8"));
+    CHECK(Error(cx, "[\"\\t\\u0Z"                       , "1", "8"));
+    CHECK(Error(cx, "[\"\\t\\u0ZZ"                      , "1", "8"));
+    CHECK(Error(cx, "[\"\\t\\u0ZZZ"                     , "1", "8"));
+    CHECK(Error(cx, "[\"\\t\\u0ZZZZ"                    , "1", "8"));
+
+    CHECK(Error(cx, "[\"\\t\\u00"                       , "1", "9"));
+    CHECK(Error(cx, "[\"\\t\\u00Z"                      , "1", "9"));
+    CHECK(Error(cx, "[\"\\t\\u00ZZ"                     , "1", "9"));
+    CHECK(Error(cx, "[\"\\t\\u00ZZZ"                    , "1", "9"));
+
+    CHECK(Error(cx, "[\"\\t\\u000"                      , "1", "10"));
+    CHECK(Error(cx, "[\"\\t\\u000Z"                     , "1", "10"));
+    CHECK(Error(cx, "[\"\\t\\u000ZZ"                    , "1", "10"));
+
    return true;
 }

-template<size_t N> inline bool
-Error(JSContext *cx, const char (&input)[N])
+template<size_t N, size_t M, size_t L> inline bool
+Error(JSContext *cx, const char (&input)[N], const char (&expectedLine)[M],
+      const char (&expectedColumn)[L])
 {
-    AutoInflatedString str(cx);
+    AutoInflatedString str(cx), line(cx), column(cx);
    RootedValue dummy(cx);
    str = input;

    ContextPrivate p = {0, 0};
    CHECK(!JS_GetContextPrivate(cx));
    JS_SetContextPrivate(cx, &p);
-    JSErrorReporter old = JS_SetErrorReporter(cx, reportJSONEror);
+    JSErrorReporter old = JS_SetErrorReporter(cx, ReportJSONError);
    bool ok = JS_ParseJSON(cx, str.chars(), str.length(), &dummy);
    JS_SetErrorReporter(cx, old);
    JS_SetContextPrivate(cx, nullptr);
@ -195,6 +288,10 @@ Error(JSContext *cx, const char (&input)[N])
    CHECK(!ok);
    CHECK(!p.unexpectedErrorCount);
    CHECK(p.expectedErrorCount == 1);
+    column = expectedColumn;
+    CHECK(js_strcmp(column.chars(), p.column) == 0);
+    line = expectedLine;
+    CHECK(js_strcmp(line.chars(), p.line) == 0);

    /* We do not execute JS, so there should be no exception thrown. */
    CHECK(!JS_IsExceptionPending(cx));
@ -203,14 +300,21 @@ Error(JSContext *cx, const char (&input)[N])
 }

 struct ContextPrivate {
+    static const size_t MaxSize = sizeof("4294967295");
    unsigned unexpectedErrorCount;
    unsigned expectedErrorCount;
+    jschar column[MaxSize];
+    jschar line[MaxSize];
 };

 static void
-reportJSONEror(JSContext *cx, const char *message, JSErrorReport *report)
+ReportJSONError(JSContext *cx, const char *message, JSErrorReport *report)
 {
    ContextPrivate *p = static_cast<ContextPrivate *>(JS_GetContextPrivate(cx));
+    // Although messageArgs[1] and messageArgs[2] are jschar*, we cast them to char*
+    // here because JSONParser::error() stores char* strings in them.
+    js_strncpy(p->line, report->messageArgs[1], js_strlen(report->messageArgs[1]));
+    js_strncpy(p->column, report->messageArgs[2], js_strlen(report->messageArgs[2]));
    if (report->errorNumber == JSMSG_JSON_BAD_PARSE)
        p->expectedErrorCount++;
    else
--- a/js/src/jsonparser.cpp
+++ b/js/src/jsonparser.cpp
@ -13,6 +13,7 @@
 #include "jsarray.h"
 #include "jscompartment.h"
 #include "jsnum.h"
+#include "jsprf.h"

 #include "vm/StringBuffer.h"

@ -56,11 +57,43 @@ JSONParser::trace(JSTracer *trc)
    }
 }

+void
+JSONParser::getTextPosition(uint32_t *column, uint32_t *line)
+{
+    StableCharPtr ptr = begin;
+    uint32_t col = 1;
+    uint32_t row = 1;
+    for (; ptr < current; ptr++) {
+        if (*ptr == '\n' || *ptr == '\r') {
+            ++row;
+            col = 1;
+            // \r\n is treated as a single newline.
+            if (ptr + 1 < current && *ptr == '\r' && *(ptr + 1) == '\n')
+                ++ptr;
+        } else {
+            ++col;
+        }
+    }
+    *column = col;
+    *line = row;
+}
+
 void
 JSONParser::error(const char *msg)
 {
-    if (errorHandling == RaiseError)
-        JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_JSON_BAD_PARSE, msg);
+    if (errorHandling == RaiseError) {
+        uint32_t column = 1, line = 1;
+        getTextPosition(&column, &line);
+
+        const size_t MaxWidth = sizeof("4294967295");
+        char columnNumber[MaxWidth];
+        JS_snprintf(columnNumber, sizeof columnNumber, "%lu", column);
+        char lineNumber[MaxWidth];
+        JS_snprintf(lineNumber, sizeof lineNumber, "%lu", line);
+
+        JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_JSON_BAD_PARSE,
+                             msg, lineNumber, columnNumber);
+    }
 }

 bool
@ -136,6 +169,7 @@ JSONParser::readString()
        }

        if (c != '\\') {
+            --current;
            error("bad character in string literal");
            return token(Error);
        }
@ -154,25 +188,37 @@ JSONParser::readString()
          case 't':  c = '\t'; break;

          case 'u':
-            if (end - current < 4) {
+            if (end - current < 4 ||
+                !(JS7_ISHEX(current[0]) &&
+                  JS7_ISHEX(current[1]) &&
+                  JS7_ISHEX(current[2]) &&
+                  JS7_ISHEX(current[3])))
+            {
+                // Point to the first non-hexadecimal character (which may be
+                // missing).
+                if (current == end || !JS7_ISHEX(current[0]))
+                    ; // already at correct location
+                else if (current + 1 == end || !JS7_ISHEX(current[1]))
+                    current += 1;
+                else if (current + 2 == end || !JS7_ISHEX(current[2]))
+                    current += 2;
+                else if (current + 3 == end || !JS7_ISHEX(current[3]))
+                    current += 3;
+                else
+                    MOZ_ASSUME_UNREACHABLE("logic error determining first erroneous character");
+
                error("bad Unicode escape");
                return token(Error);
            }
-            if (JS7_ISHEX(current[0]) &&
-                JS7_ISHEX(current[1]) &&
-                JS7_ISHEX(current[2]) &&
-                JS7_ISHEX(current[3]))
-            {
-                c = (JS7_UNHEX(current[0]) << 12)
-                  | (JS7_UNHEX(current[1]) << 8)
-                  | (JS7_UNHEX(current[2]) << 4)
-                  | (JS7_UNHEX(current[3]));
-                current += 4;
-                break;
-            }
-            /* FALL THROUGH */
+            c = (JS7_UNHEX(current[0]) << 12)
+              | (JS7_UNHEX(current[1]) << 8)
+              | (JS7_UNHEX(current[2]) << 4)
+              | (JS7_UNHEX(current[3]));
+            current += 4;
+            break;

          default:
+            current--;
            error("bad escaped character");
            return token(Error);
        }
@ -740,6 +786,9 @@ JSONParser::parse(MutableHandleValue vp)
              case ObjectClose:
              case Colon:
              case Comma:
+                // Move the current pointer backwards so that the position
+                // reported in the error message is correct.
+                --current;
                error("unexpected character");
                return errorReturn();

--- a/js/src/jsonparser.h
+++ b/js/src/jsonparser.h
@ -24,7 +24,7 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter

    JSContext * const cx;
    StableCharPtr current;
-    const StableCharPtr end;
+    const StableCharPtr begin, end;

    Value v;

@ -112,6 +112,7 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter
      : AutoGCRooter(cx, JSONPARSER),
        cx(cx),
        current(data),
+        begin(data),
        end((data + length).get(), data.get(), length),
        errorHandling(errorHandling),
        stack(cx),
@ -200,6 +201,8 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter
    bool finishObject(MutableHandleValue vp, PropertyVector &properties);
    bool finishArray(MutableHandleValue vp, ElementVector &elements);

+    void getTextPosition(uint32_t *column, uint32_t *line);
+
    friend void AutoGCRooter::trace(JSTracer *trc);
    void trace(JSTracer *trc);

--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@ -4175,6 +4175,18 @@ js_strlen(const jschar *s)
    return (size_t)(t - s);
 }

+int32_t
+js_strcmp(const jschar *lhs, const jschar *rhs)
+{
+    while (true) {
+        if (*lhs != *rhs)
+            return int32_t(*lhs) - int32_t(*rhs);
+        if (*lhs == 0)
+            return 0;
+        ++lhs, ++rhs;
+    }
+}
+
 jschar *
 js_strdup(js::ThreadSafeContext *cx, const jschar *s)
 {
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@ -226,6 +226,9 @@ StringHasPattern(const jschar *text, uint32_t textlen,
 extern size_t
 js_strlen(const jschar *s);

+extern int32_t
+js_strcmp(const jschar *lhs, const jschar *rhs);
+
 extern jschar *
 js_strchr_limit(const jschar *s, jschar c, const jschar *limit);