Sam McCall 658990772d [JSON] Add error reporting to fromJSON and ObjectMapper
Translating between JSON objects and C++ strutctures is common.
From experience in clangd, fromJSON/ObjectMapper work well and save a lot of
code, but aren't adopted elsewhere at least partly due to total lack of error
reporting beyond "ok"/"bad".

The recently-added error model should be rich enough for most applications.
It requires tracking the path within the root object and reporting local
errors at appropriate places.
To do this, we exploit the fact that the call graph of recursive
parse functions mirror the structure of the JSON itself.
The current path is represented as a linked list of segments, each of which is
on the stack as a parameter. Concretely, fromJSON now looks like:
  bool fromJSON(const Value&, T&, Path);

Beyond the signature change, this is reasonably unobtrusive: building
the path segments is mostly handled by ObjectMapper and the vector<T> fromJSON.
However the root caller of fromJSON must now create a Root object to
store the errors, which is a little clunky.

I've added high-level parse<T>(StringRef) -> Expected<T>, but it's not
general enough to be the primary interface I think (at least, not usable in
clangd).

All existing users (mostly just clangd) are updated in this patch,
making this change backwards-compatible is a bit hairy.

Differential Revision: https://reviews.llvm.org/D88103
2020-09-24 01:20:09 +02:00

551 lines
17 KiB
C++
Raw Blame History

//===-- JSONTest.cpp - JSON unit tests --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace llvm {
namespace json {
namespace {
std::string s(const Value &E) { return llvm::formatv("{0}", E).str(); }
std::string sp(const Value &E) { return llvm::formatv("{0:2}", E).str(); }
TEST(JSONTest, Types) {
EXPECT_EQ("true", s(true));
EXPECT_EQ("null", s(nullptr));
EXPECT_EQ("2.5", s(2.5));
EXPECT_EQ(R"("foo")", s("foo"));
EXPECT_EQ("[1,2,3]", s({1, 2, 3}));
EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));
#ifdef NDEBUG
EXPECT_EQ(R"("<EFBFBD><EFBFBD>")", s("\xC0\x80"));
EXPECT_EQ(R"({"<EFBFBD><EFBFBD>":0})", s(Object{{"\xC0\x80", 0}}));
#else
EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8");
EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8");
#endif
}
TEST(JSONTest, Constructors) {
// Lots of edge cases around empty and singleton init lists.
EXPECT_EQ("[[[3]]]", s({{{3}}}));
EXPECT_EQ("[[[]]]", s({{{}}}));
EXPECT_EQ("[[{}]]", s({{Object{}}}));
EXPECT_EQ(R"({"A":{"B":{}}})", s(Object{{"A", Object{{"B", Object{}}}}}));
EXPECT_EQ(R"({"A":{"B":{"X":"Y"}}})",
s(Object{{"A", Object{{"B", Object{{"X", "Y"}}}}}}));
EXPECT_EQ("null", s(llvm::Optional<double>()));
EXPECT_EQ("2.5", s(llvm::Optional<double>(2.5)));
EXPECT_EQ("[[2.5,null]]", s(std::vector<std::vector<llvm::Optional<double>>>{
{2.5, llvm::None}}));
}
TEST(JSONTest, StringOwnership) {
char X[] = "Hello";
Value Alias = static_cast<const char *>(X);
X[1] = 'a';
EXPECT_EQ(R"("Hallo")", s(Alias));
std::string Y = "Hello";
Value Copy = Y;
Y[1] = 'a';
EXPECT_EQ(R"("Hello")", s(Copy));
}
TEST(JSONTest, CanonicalOutput) {
// Objects are sorted (but arrays aren't)!
EXPECT_EQ(R"({"a":1,"b":2,"c":3})", s(Object{{"a", 1}, {"c", 3}, {"b", 2}}));
EXPECT_EQ(R"(["a","c","b"])", s({"a", "c", "b"}));
EXPECT_EQ("3", s(3.0));
}
TEST(JSONTest, Escaping) {
std::string Test = {
0, // Strings may contain nulls.
'\b', '\f', // Have mnemonics, but we escape numerically.
'\r', '\n', '\t', // Escaped with mnemonics.
'S', '\"', '\\', // Printable ASCII characters.
'\x7f', // Delete is not escaped.
'\xce', '\x94', // Non-ASCII UTF-8 is not escaped.
};
std::string TestString = R"("\u0000\u0008\u000c\r\n\tS\"\\)"
"\x7f\xCE\x94\"";
EXPECT_EQ(TestString, s(Test));
EXPECT_EQ(R"({"object keys are\nescaped":true})",
s(Object{{"object keys are\nescaped", true}}));
}
TEST(JSONTest, PrettyPrinting) {
const char Str[] = R"({
"empty_array": [],
"empty_object": {},
"full_array": [
1,
null
],
"full_object": {
"nested_array": [
{
"property": "value"
}
]
}
})";
EXPECT_EQ(Str, sp(Object{
{"empty_object", Object{}},
{"empty_array", {}},
{"full_array", {1, nullptr}},
{"full_object",
Object{
{"nested_array",
{Object{
{"property", "value"},
}}},
}},
}));
}
TEST(JSONTest, Array) {
Array A{1, 2};
A.emplace_back(3);
A.emplace(++A.begin(), 0);
A.push_back(4);
A.insert(++++A.begin(), 99);
EXPECT_EQ(A.size(), 6u);
EXPECT_EQ(R"([1,0,99,2,3,4])", s(std::move(A)));
}
TEST(JSONTest, Object) {
Object O{{"a", 1}, {"b", 2}, {"c", 3}};
EXPECT_TRUE(O.try_emplace("d", 4).second);
EXPECT_FALSE(O.try_emplace("a", 4).second);
auto D = O.find("d");
EXPECT_FALSE(D == O.end());
auto E = O.find("e");
EXPECT_TRUE(E == O.end());
O.erase("b");
O.erase(D);
EXPECT_EQ(O.size(), 2u);
EXPECT_EQ(R"({"a":1,"c":3})", s(std::move(O)));
}
TEST(JSONTest, Parse) {
auto Compare = [](llvm::StringRef S, Value Expected) {
if (auto E = parse(S)) {
// Compare both string forms and with operator==, in case we have bugs.
EXPECT_EQ(*E, Expected);
EXPECT_EQ(sp(*E), sp(Expected));
} else {
handleAllErrors(E.takeError(), [S](const llvm::ErrorInfoBase &E) {
FAIL() << "Failed to parse JSON >>> " << S << " <<<: " << E.message();
});
}
};
Compare(R"(true)", true);
Compare(R"(false)", false);
Compare(R"(null)", nullptr);
Compare(R"(42)", 42);
Compare(R"(2.5)", 2.5);
Compare(R"(2e50)", 2e50);
Compare(R"(1.2e3456789)", std::numeric_limits<double>::infinity());
Compare(R"("foo")", "foo");
Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t");
Compare(R"("\u0000")", llvm::StringRef("\0", 1));
Compare("\"\x7f\"", "\x7f");
Compare(R"("\ud801\udc37")", u8"\U00010437"); // UTF16 surrogate pair escape.
Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", u8"\u20ac\U0001d11e"); // UTF8
Compare(
R"("LoneLeading=\ud801, LoneTrailing=\udc01, LeadingLeadingTrailing=\ud801\ud801\udc37")",
u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, "
u8"LeadingLeadingTrailing=\ufffd\U00010437"); // Invalid unicode.
Compare(R"({"":0,"":0})", Object{{"", 0}});
Compare(R"({"obj":{},"arr":[]})", Object{{"obj", Object{}}, {"arr", {}}});
Compare(R"({"\n":{"\u0000":[[[[]]]]}})",
Object{{"\n", Object{
{llvm::StringRef("\0", 1), {{{{}}}}},
}}});
Compare("\r[\n\t] ", {});
}
TEST(JSONTest, ParseErrors) {
auto ExpectErr = [](llvm::StringRef Msg, llvm::StringRef S) {
if (auto E = parse(S)) {
// Compare both string forms and with operator==, in case we have bugs.
FAIL() << "Parsed JSON >>> " << S << " <<< but wanted error: " << Msg;
} else {
handleAllErrors(E.takeError(), [S, Msg](const llvm::ErrorInfoBase &E) {
EXPECT_THAT(E.message(), testing::HasSubstr(std::string(Msg))) << S;
});
}
};
ExpectErr("Unexpected EOF", "");
ExpectErr("Unexpected EOF", "[");
ExpectErr("Text after end of document", "[][]");
ExpectErr("Invalid JSON value (false?)", "fuzzy");
ExpectErr("Expected , or ]", "[2?]");
ExpectErr("Expected object key", "{a:2}");
ExpectErr("Expected : after object key", R"({"a",2})");
ExpectErr("Expected , or } after object property", R"({"a":2 "b":3})");
ExpectErr("Invalid JSON value", R"([&%!])");
ExpectErr("Invalid JSON value (number?)", "1e1.0");
ExpectErr("Unterminated string", R"("abc\"def)");
ExpectErr("Control character in string", "\"abc\ndef\"");
ExpectErr("Invalid escape sequence", R"("\030")");
ExpectErr("Invalid \\u escape sequence", R"("\usuck")");
ExpectErr("[3:3, byte=19]", R"({
"valid": 1,
invalid: 2
})");
ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null
}
// Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere.
TEST(JSONTest, UTF8) {
for (const char *Valid : {
"this is ASCII text",
"thïs tëxt häs BMP chäräctërs",
"𐌶𐌰L𐌾𐍈 C𐍈𐌼𐌴𐍃",
}) {
EXPECT_TRUE(isUTF8(Valid)) << Valid;
EXPECT_EQ(fixUTF8(Valid), Valid);
}
for (auto Invalid : std::vector<std::pair<const char *, const char *>>{
{"lone trailing \x81\x82 bytes", "lone trailing <20><> bytes"},
{"missing trailing \xD0 bytes", "missing trailing <20> bytes"},
{"truncated character \xD0", "truncated character <20>"},
{"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding",
"not <20><> the <20><><EFBFBD> shortest <20><><EFBFBD><EFBFBD> encoding"},
{"too \xF9\x80\x80\x80\x80 long", "too <20><><EFBFBD><EFBFBD><EFBFBD> long"},
{"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80",
"surrogate <20><><EFBFBD> invalid <20><><EFBFBD><EFBFBD>"}}) {
EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first;
EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second);
}
}
TEST(JSONTest, Inspection) {
llvm::Expected<Value> Doc = parse(R"(
{
"null": null,
"boolean": false,
"number": 2.78,
"string": "json",
"array": [null, true, 3.14, "hello", [1,2,3], {"time": "arrow"}],
"object": {"fruit": "banana"}
}
)");
EXPECT_TRUE(!!Doc);
Object *O = Doc->getAsObject();
ASSERT_TRUE(O);
EXPECT_FALSE(O->getNull("missing"));
EXPECT_FALSE(O->getNull("boolean"));
EXPECT_TRUE(O->getNull("null"));
EXPECT_EQ(O->getNumber("number"), llvm::Optional<double>(2.78));
EXPECT_FALSE(O->getInteger("number"));
EXPECT_EQ(O->getString("string"), llvm::Optional<llvm::StringRef>("json"));
ASSERT_FALSE(O->getObject("missing"));
ASSERT_FALSE(O->getObject("array"));
ASSERT_TRUE(O->getObject("object"));
EXPECT_EQ(*O->getObject("object"), (Object{{"fruit", "banana"}}));
Array *A = O->getArray("array");
ASSERT_TRUE(A);
EXPECT_EQ((*A)[1].getAsBoolean(), llvm::Optional<bool>(true));
ASSERT_TRUE((*A)[4].getAsArray());
EXPECT_EQ(*(*A)[4].getAsArray(), (Array{1, 2, 3}));
EXPECT_EQ((*(*A)[4].getAsArray())[1].getAsInteger(),
llvm::Optional<int64_t>(2));
int I = 0;
for (Value &E : *A) {
if (I++ == 5) {
ASSERT_TRUE(E.getAsObject());
EXPECT_EQ(E.getAsObject()->getString("time"),
llvm::Optional<llvm::StringRef>("arrow"));
} else
EXPECT_FALSE(E.getAsObject());
}
}
// Verify special integer handling - we try to preserve exact int64 values.
TEST(JSONTest, Integers) {
struct {
const char *Desc;
Value Val;
const char *Str;
llvm::Optional<int64_t> AsInt;
llvm::Optional<double> AsNumber;
} TestCases[] = {
{
"Non-integer. Stored as double, not convertible.",
double{1.5},
"1.5",
llvm::None,
1.5,
},
{
"Integer, not exact double. Stored as int64, convertible.",
int64_t{0x4000000000000001},
"4611686018427387905",
int64_t{0x4000000000000001},
double{0x4000000000000000},
},
{
"Negative integer, not exact double. Stored as int64, convertible.",
int64_t{-0x4000000000000001},
"-4611686018427387905",
int64_t{-0x4000000000000001},
double{-0x4000000000000000},
},
// PR46470,
// https://developercommunity.visualstudio.com/content/problem/1093399/incorrect-result-when-printing-6917529027641081856.html
#if !defined(_MSC_VER) || _MSC_VER < 1926
{
"Dynamically exact integer. Stored as double, convertible.",
double{0x6000000000000000},
"6.9175290276410819e+18",
int64_t{0x6000000000000000},
double{0x6000000000000000},
},
#endif
{
"Dynamically integer, >64 bits. Stored as double, not convertible.",
1.5 * double{0x8000000000000000},
"1.3835058055282164e+19",
llvm::None,
1.5 * double{0x8000000000000000},
},
};
for (const auto &T : TestCases) {
EXPECT_EQ(T.Str, s(T.Val)) << T.Desc;
llvm::Expected<Value> Doc = parse(T.Str);
EXPECT_TRUE(!!Doc) << T.Desc;
EXPECT_EQ(Doc->getAsInteger(), T.AsInt) << T.Desc;
EXPECT_EQ(Doc->getAsNumber(), T.AsNumber) << T.Desc;
EXPECT_EQ(T.Val, *Doc) << T.Desc;
EXPECT_EQ(T.Str, s(*Doc)) << T.Desc;
}
}
// Sample struct with typical JSON-mapping rules.
struct CustomStruct {
CustomStruct() : B(false) {}
CustomStruct(std::string S, llvm::Optional<int> I, bool B)
: S(S), I(I), B(B) {}
std::string S;
llvm::Optional<int> I;
bool B;
};
inline bool operator==(const CustomStruct &L, const CustomStruct &R) {
return L.S == R.S && L.I == R.I && L.B == R.B;
}
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const CustomStruct &S) {
return OS << "(" << S.S << ", " << (S.I ? std::to_string(*S.I) : "None")
<< ", " << S.B << ")";
}
bool fromJSON(const Value &E, CustomStruct &R, Path P) {
ObjectMapper O(E, P);
if (!O || !O.map("str", R.S) || !O.map("int", R.I))
return false;
O.map("bool", R.B);
return true;
}
static std::string errorContext(const Value &V, const Path::Root &R) {
std::string Context;
llvm::raw_string_ostream OS(Context);
R.printErrorContext(V, OS);
return OS.str();
}
TEST(JSONTest, Deserialize) {
std::map<std::string, std::vector<CustomStruct>> R;
CustomStruct ExpectedStruct = {"foo", 42, true};
std::map<std::string, std::vector<CustomStruct>> Expected;
Value J = Object{
{"foo",
Array{
Object{
{"str", "foo"},
{"int", 42},
{"bool", true},
{"unknown", "ignored"},
},
Object{{"str", "bar"}},
Object{
{"str", "baz"}, {"bool", "string"}, // OK, deserialize ignores.
},
}}};
Expected["foo"] = {
CustomStruct("foo", 42, true),
CustomStruct("bar", llvm::None, false),
CustomStruct("baz", llvm::None, false),
};
Path::Root Root("CustomStruct");
ASSERT_TRUE(fromJSON(J, R, Root));
EXPECT_EQ(R, Expected);
(*J.getAsObject()->getArray("foo"))[0] = 123;
ASSERT_FALSE(fromJSON(J, R, Root));
EXPECT_EQ("expected object at CustomStruct.foo[0]",
toString(Root.getError()));
const char *ExpectedDump = R"({
"foo": [
/* error: expected object */
123,
{ ... },
{ ... }
]
})";
EXPECT_EQ(ExpectedDump, errorContext(J, Root));
CustomStruct V;
EXPECT_FALSE(fromJSON(nullptr, V, Root));
EXPECT_EQ("expected object when parsing CustomStruct",
toString(Root.getError()));
EXPECT_FALSE(fromJSON(Object{}, V, Root));
EXPECT_EQ("missing value at CustomStruct.str", toString(Root.getError()));
EXPECT_FALSE(fromJSON(Object{{"str", 1}}, V, Root));
EXPECT_EQ("expected string at CustomStruct.str", toString(Root.getError()));
// Optional<T> must parse as the correct type if present.
EXPECT_FALSE(fromJSON(Object{{"str", "1"}, {"int", "string"}}, V, Root));
EXPECT_EQ("expected integer at CustomStruct.int", toString(Root.getError()));
}
TEST(JSONTest, ParseDeserialize) {
auto E = parse<std::vector<CustomStruct>>(R"json(
[{"str": "foo", "int": 42}, {"int": 42}]
)json");
EXPECT_THAT_EXPECTED(E, FailedWithMessage("missing value at (root)[1].str"));
E = parse<std::vector<CustomStruct>>(R"json(
[{"str": "foo", "int": 42}, {"str": "bar"}
)json");
EXPECT_THAT_EXPECTED(
E,
FailedWithMessage("[3:2, byte=50]: Expected , or ] after array element"));
E = parse<std::vector<CustomStruct>>(R"json(
[{"str": "foo", "int": 42}]
)json");
EXPECT_THAT_EXPECTED(E, Succeeded());
EXPECT_THAT(*E, testing::SizeIs(1));
}
TEST(JSONTest, Stream) {
auto StreamStuff = [](unsigned Indent) {
std::string S;
llvm::raw_string_ostream OS(S);
OStream J(OS, Indent);
J.comment("top*/level");
J.object([&] {
J.attributeArray("foo", [&] {
J.value(nullptr);
J.comment("element");
J.value(42.5);
J.arrayBegin();
J.value(43);
J.arrayEnd();
});
J.comment("attribute");
J.attributeBegin("bar");
J.comment("attribute value");
J.objectBegin();
J.objectEnd();
J.attributeEnd();
J.attribute("baz", "xyz");
});
return OS.str();
};
const char *Plain =
R"(/*top* /level*/{"foo":[null,/*element*/42.5,[43]],/*attribute*/"bar":/*attribute value*/{},"baz":"xyz"})";
EXPECT_EQ(Plain, StreamStuff(0));
const char *Pretty = R"(/* top* /level */
{
"foo": [
null,
/* element */
42.5,
[
43
]
],
/* attribute */
"bar": /* attribute value */ {},
"baz": "xyz"
})";
EXPECT_EQ(Pretty, StreamStuff(2));
}
TEST(JSONTest, Path) {
Path::Root R("foo");
Path P = R, A = P.field("a"), B = P.field("b");
P.report("oh no");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("oh no when parsing foo"));
A.index(1).field("c").index(2).report("boom");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("boom at foo.a[1].c[2]"));
B.field("d").field("e").report("bam");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("bam at foo.b.d.e"));
Value V = Object{
{"a", Array{42}},
{"b",
Object{{"d",
Object{
{"e", Array{1, Object{{"x", "y"}}}},
{"f", "a moderately long string: 48 characters in total"},
}}}},
};
const char *Expected = R"({
"a": [ ... ],
"b": {
"d": {
"e": /* error: bam */ [
1,
{ ... }
],
"f": "a moderately long string: 48 characte..."
}
}
})";
EXPECT_EQ(Expected, errorContext(V, R));
}
} // namespace
} // namespace json
} // namespace llvm