Bug 507998 - Add line/column information to the errors reported by JSON.parse, pointing out exactly where in the input string the first bad character was found. r=njn, r=Waldo, rs=jorendorff on the delta (particularly the Unicode escape bits) from the originally-posted patch

This commit is contained in:
masaya iseki 2013-12-19 14:40:58 -05:00
parent bb7f1ca7da
commit 2abc272a9b
6 changed files with 204 additions and 33 deletions

View File

@ -278,7 +278,7 @@ MSG_DEF(JSMSG_BAD_OBJECT_INIT, 224, 0, JSEXN_SYNTAXERR, "invalid object i
MSG_DEF(JSMSG_CANT_SET_ARRAY_ATTRS, 225, 0, JSEXN_INTERNALERR, "can't set attributes on indexed array properties")
MSG_DEF(JSMSG_EVAL_ARITY, 226, 0, JSEXN_TYPEERR, "eval accepts only one parameter")
MSG_DEF(JSMSG_MISSING_FUN_ARG, 227, 2, JSEXN_TYPEERR, "missing argument {0} when calling function {1}")
MSG_DEF(JSMSG_JSON_BAD_PARSE, 228, 1, JSEXN_SYNTAXERR, "JSON.parse: {0}")
MSG_DEF(JSMSG_JSON_BAD_PARSE, 228, 3, JSEXN_SYNTAXERR, "JSON.parse: {0} at line {1} column {2} of the JSON data")
MSG_DEF(JSMSG_JSON_BAD_STRINGIFY, 229, 0, JSEXN_ERR, "JSON.stringify")
MSG_DEF(JSMSG_NOT_CALLABLE_OR_UNDEFINED, 230, 0, JSEXN_TYPEERR, "value is not a function or undefined")
MSG_DEF(JSMSG_NOT_NONNULL_OBJECT, 231, 0, JSEXN_TYPEERR, "value is not a non-null object")

View File

@ -164,30 +164,123 @@ END_TEST(testParseJSON_success)
BEGIN_TEST(testParseJSON_error)
{
CHECK(Error(cx, "["));
CHECK(Error(cx, "[,]"));
CHECK(Error(cx, "[1,]"));
CHECK(Error(cx, "{a:2}"));
CHECK(Error(cx, "{\"a\":2,}"));
CHECK(Error(cx, "]"));
CHECK(Error(cx, "'bad string'"));
CHECK(Error(cx, "\""));
CHECK(Error(cx, "{]"));
CHECK(Error(cx, "[}"));
CHECK(Error(cx, "" , "1", "1"));
CHECK(Error(cx, "\n" , "2", "1"));
CHECK(Error(cx, "\r" , "2", "1"));
CHECK(Error(cx, "\r\n" , "2", "1"));
CHECK(Error(cx, "[" , "1", "2"));
CHECK(Error(cx, "[,]" , "1", "2"));
CHECK(Error(cx, "[1,]" , "1", "4"));
CHECK(Error(cx, "{a:2}" , "1", "2"));
CHECK(Error(cx, "{\"a\":2,}" , "1", "8"));
CHECK(Error(cx, "]" , "1", "1"));
CHECK(Error(cx, "\"" , "1", "2"));
CHECK(Error(cx, "{]" , "1", "2"));
CHECK(Error(cx, "[}" , "1", "2"));
CHECK(Error(cx, "'wrongly-quoted string'" , "1", "1"));
CHECK(Error(cx, "{\"a\":2 \n b:3}" , "2", "2"));
CHECK(Error(cx, "\n[" , "2", "2"));
CHECK(Error(cx, "\n[,]" , "2", "2"));
CHECK(Error(cx, "\n[1,]" , "2", "4"));
CHECK(Error(cx, "\n{a:2}" , "2", "2"));
CHECK(Error(cx, "\n{\"a\":2,}" , "2", "8"));
CHECK(Error(cx, "\n]" , "2", "1"));
CHECK(Error(cx, "\"bad string\n\"" , "1", "12"));
CHECK(Error(cx, "\r'wrongly-quoted string'" , "2", "1"));
CHECK(Error(cx, "\n\"" , "2", "2"));
CHECK(Error(cx, "\n{]" , "2", "2"));
CHECK(Error(cx, "\n[}" , "2", "2"));
CHECK(Error(cx, "{\"a\":[2,3],\n\"b\":,5,6}" , "2", "5"));
CHECK(Error(cx, "{\"a\":2 \r b:3}" , "2", "2"));
CHECK(Error(cx, "\r[" , "2", "2"));
CHECK(Error(cx, "\r[,]" , "2", "2"));
CHECK(Error(cx, "\r[1,]" , "2", "4"));
CHECK(Error(cx, "\r{a:2}" , "2", "2"));
CHECK(Error(cx, "\r{\"a\":2,}" , "2", "8"));
CHECK(Error(cx, "\r]" , "2", "1"));
CHECK(Error(cx, "\"bad string\r\"" , "1", "12"));
CHECK(Error(cx, "\r'wrongly-quoted string'" , "2", "1"));
CHECK(Error(cx, "\r\"" , "2", "2"));
CHECK(Error(cx, "\r{]" , "2", "2"));
CHECK(Error(cx, "\r[}" , "2", "2"));
CHECK(Error(cx, "{\"a\":[2,3],\r\"b\":,5,6}" , "2", "5"));
CHECK(Error(cx, "{\"a\":2 \r\n b:3}" , "2", "2"));
CHECK(Error(cx, "\r\n[" , "2", "2"));
CHECK(Error(cx, "\r\n[,]" , "2", "2"));
CHECK(Error(cx, "\r\n[1,]" , "2", "4"));
CHECK(Error(cx, "\r\n{a:2}" , "2", "2"));
CHECK(Error(cx, "\r\n{\"a\":2,}" , "2", "8"));
CHECK(Error(cx, "\r\n]" , "2", "1"));
CHECK(Error(cx, "\"bad string\r\n\"" , "1", "12"));
CHECK(Error(cx, "\r\n'wrongly-quoted string'" , "2", "1"));
CHECK(Error(cx, "\r\n\"" , "2", "2"));
CHECK(Error(cx, "\r\n{]" , "2", "2"));
CHECK(Error(cx, "\r\n[}" , "2", "2"));
CHECK(Error(cx, "{\"a\":[2,3],\r\n\"b\":,5,6}" , "2", "5"));
CHECK(Error(cx, "\n\"bad string\n\"" , "2", "12"));
CHECK(Error(cx, "\r\"bad string\r\"" , "2", "12"));
CHECK(Error(cx, "\r\n\"bad string\r\n\"" , "2", "12"));
CHECK(Error(cx, "{\n\"a\":[2,3],\r\"b\":,5,6}" , "3", "5"));
CHECK(Error(cx, "{\r\"a\":[2,3],\n\"b\":,5,6}" , "3", "5"));
CHECK(Error(cx, "[\"\\t\\q" , "1", "6"));
CHECK(Error(cx, "[\"\\t\u0000" , "1", "5"));
// Unicode escape errors are messy. The first bad character could be
// non-hexadecimal, or it could be absent entirely. Include tests where
// there's a bad character, followed by zero to as many characters as are
// needed to form a complete Unicode escape sequence, plus one. (The extra
// characters beyond are valuable because our implementation checks for
// too-few subsequent characters first, before checking for subsequent
// non-hexadecimal characters. So \u<END>, \u0<END>, \u00<END>, and
// \u000<END> are all *detected* as invalid by the same code path, but the
// process of computing the first invalid character follows a different
// code path for each. And \uQQQQ, \u0QQQ, \u00QQ, and \u000Q are detected
// as invalid by the same code path [ignoring which precise subexpression
// triggers failure of a single condition], but the computation of the
// first invalid character follows a different code path for each.)
CHECK(Error(cx, "[\"\\t\\u" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\uZ" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\uZZ" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\uZZZ" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\uZZZZ" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\uZZZZZ" , "1", "7"));
CHECK(Error(cx, "[\"\\t\\u0" , "1", "8"));
CHECK(Error(cx, "[\"\\t\\u0Z" , "1", "8"));
CHECK(Error(cx, "[\"\\t\\u0ZZ" , "1", "8"));
CHECK(Error(cx, "[\"\\t\\u0ZZZ" , "1", "8"));
CHECK(Error(cx, "[\"\\t\\u0ZZZZ" , "1", "8"));
CHECK(Error(cx, "[\"\\t\\u00" , "1", "9"));
CHECK(Error(cx, "[\"\\t\\u00Z" , "1", "9"));
CHECK(Error(cx, "[\"\\t\\u00ZZ" , "1", "9"));
CHECK(Error(cx, "[\"\\t\\u00ZZZ" , "1", "9"));
CHECK(Error(cx, "[\"\\t\\u000" , "1", "10"));
CHECK(Error(cx, "[\"\\t\\u000Z" , "1", "10"));
CHECK(Error(cx, "[\"\\t\\u000ZZ" , "1", "10"));
return true;
}
template<size_t N> inline bool
Error(JSContext *cx, const char (&input)[N])
template<size_t N, size_t M, size_t L> inline bool
Error(JSContext *cx, const char (&input)[N], const char (&expectedLine)[M],
const char (&expectedColumn)[L])
{
AutoInflatedString str(cx);
AutoInflatedString str(cx), line(cx), column(cx);
RootedValue dummy(cx);
str = input;
ContextPrivate p = {0, 0};
CHECK(!JS_GetContextPrivate(cx));
JS_SetContextPrivate(cx, &p);
JSErrorReporter old = JS_SetErrorReporter(cx, reportJSONEror);
JSErrorReporter old = JS_SetErrorReporter(cx, ReportJSONError);
bool ok = JS_ParseJSON(cx, str.chars(), str.length(), &dummy);
JS_SetErrorReporter(cx, old);
JS_SetContextPrivate(cx, nullptr);
@ -195,6 +288,10 @@ Error(JSContext *cx, const char (&input)[N])
CHECK(!ok);
CHECK(!p.unexpectedErrorCount);
CHECK(p.expectedErrorCount == 1);
column = expectedColumn;
CHECK(js_strcmp(column.chars(), p.column) == 0);
line = expectedLine;
CHECK(js_strcmp(line.chars(), p.line) == 0);
/* We do not execute JS, so there should be no exception thrown. */
CHECK(!JS_IsExceptionPending(cx));
@ -203,14 +300,21 @@ Error(JSContext *cx, const char (&input)[N])
}
struct ContextPrivate {
static const size_t MaxSize = sizeof("4294967295");
unsigned unexpectedErrorCount;
unsigned expectedErrorCount;
jschar column[MaxSize];
jschar line[MaxSize];
};
static void
reportJSONEror(JSContext *cx, const char *message, JSErrorReport *report)
ReportJSONError(JSContext *cx, const char *message, JSErrorReport *report)
{
ContextPrivate *p = static_cast<ContextPrivate *>(JS_GetContextPrivate(cx));
// Although messageArgs[1] and messageArgs[2] are jschar*, we cast them to char*
// here because JSONParser::error() stores char* strings in them.
js_strncpy(p->line, report->messageArgs[1], js_strlen(report->messageArgs[1]));
js_strncpy(p->column, report->messageArgs[2], js_strlen(report->messageArgs[2]));
if (report->errorNumber == JSMSG_JSON_BAD_PARSE)
p->expectedErrorCount++;
else

View File

@ -13,6 +13,7 @@
#include "jsarray.h"
#include "jscompartment.h"
#include "jsnum.h"
#include "jsprf.h"
#include "vm/StringBuffer.h"
@ -56,11 +57,43 @@ JSONParser::trace(JSTracer *trc)
}
}
void
JSONParser::getTextPosition(uint32_t *column, uint32_t *line)
{
StableCharPtr ptr = begin;
uint32_t col = 1;
uint32_t row = 1;
for (; ptr < current; ptr++) {
if (*ptr == '\n' || *ptr == '\r') {
++row;
col = 1;
// \r\n is treated as a single newline.
if (ptr + 1 < current && *ptr == '\r' && *(ptr + 1) == '\n')
++ptr;
} else {
++col;
}
}
*column = col;
*line = row;
}
void
JSONParser::error(const char *msg)
{
if (errorHandling == RaiseError)
JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_JSON_BAD_PARSE, msg);
if (errorHandling == RaiseError) {
uint32_t column = 1, line = 1;
getTextPosition(&column, &line);
const size_t MaxWidth = sizeof("4294967295");
char columnNumber[MaxWidth];
JS_snprintf(columnNumber, sizeof columnNumber, "%lu", column);
char lineNumber[MaxWidth];
JS_snprintf(lineNumber, sizeof lineNumber, "%lu", line);
JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_JSON_BAD_PARSE,
msg, lineNumber, columnNumber);
}
}
bool
@ -136,6 +169,7 @@ JSONParser::readString()
}
if (c != '\\') {
--current;
error("bad character in string literal");
return token(Error);
}
@ -154,25 +188,37 @@ JSONParser::readString()
case 't': c = '\t'; break;
case 'u':
if (end - current < 4) {
if (end - current < 4 ||
!(JS7_ISHEX(current[0]) &&
JS7_ISHEX(current[1]) &&
JS7_ISHEX(current[2]) &&
JS7_ISHEX(current[3])))
{
// Point to the first non-hexadecimal character (which may be
// missing).
if (current == end || !JS7_ISHEX(current[0]))
; // already at correct location
else if (current + 1 == end || !JS7_ISHEX(current[1]))
current += 1;
else if (current + 2 == end || !JS7_ISHEX(current[2]))
current += 2;
else if (current + 3 == end || !JS7_ISHEX(current[3]))
current += 3;
else
MOZ_ASSUME_UNREACHABLE("logic error determining first erroneous character");
error("bad Unicode escape");
return token(Error);
}
if (JS7_ISHEX(current[0]) &&
JS7_ISHEX(current[1]) &&
JS7_ISHEX(current[2]) &&
JS7_ISHEX(current[3]))
{
c = (JS7_UNHEX(current[0]) << 12)
| (JS7_UNHEX(current[1]) << 8)
| (JS7_UNHEX(current[2]) << 4)
| (JS7_UNHEX(current[3]));
current += 4;
break;
}
/* FALL THROUGH */
c = (JS7_UNHEX(current[0]) << 12)
| (JS7_UNHEX(current[1]) << 8)
| (JS7_UNHEX(current[2]) << 4)
| (JS7_UNHEX(current[3]));
current += 4;
break;
default:
current--;
error("bad escaped character");
return token(Error);
}
@ -740,6 +786,9 @@ JSONParser::parse(MutableHandleValue vp)
case ObjectClose:
case Colon:
case Comma:
// Move the current pointer backwards so that the position
// reported in the error message is correct.
--current;
error("unexpected character");
return errorReturn();

View File

@ -24,7 +24,7 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter
JSContext * const cx;
StableCharPtr current;
const StableCharPtr end;
const StableCharPtr begin, end;
Value v;
@ -112,6 +112,7 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter
: AutoGCRooter(cx, JSONPARSER),
cx(cx),
current(data),
begin(data),
end((data + length).get(), data.get(), length),
errorHandling(errorHandling),
stack(cx),
@ -200,6 +201,8 @@ class MOZ_STACK_CLASS JSONParser : private AutoGCRooter
bool finishObject(MutableHandleValue vp, PropertyVector &properties);
bool finishArray(MutableHandleValue vp, ElementVector &elements);
void getTextPosition(uint32_t *column, uint32_t *line);
friend void AutoGCRooter::trace(JSTracer *trc);
void trace(JSTracer *trc);

View File

@ -4175,6 +4175,18 @@ js_strlen(const jschar *s)
return (size_t)(t - s);
}
int32_t
js_strcmp(const jschar *lhs, const jschar *rhs)
{
while (true) {
if (*lhs != *rhs)
return int32_t(*lhs) - int32_t(*rhs);
if (*lhs == 0)
return 0;
++lhs, ++rhs;
}
}
jschar *
js_strdup(js::ThreadSafeContext *cx, const jschar *s)
{

View File

@ -226,6 +226,9 @@ StringHasPattern(const jschar *text, uint32_t textlen,
extern size_t
js_strlen(const jschar *s);
extern int32_t
js_strcmp(const jschar *lhs, const jschar *rhs);
extern jschar *
js_strchr_limit(const jschar *s, jschar c, const jschar *limit);