Bug 1377007 - Implementation of the Token Reader dedicated to testing;r=arai,jorendorff

This patch ports to SpiderMonkey the tokenizer currently implemented
in the external binjs-ref tool.

While this tokenizer will clearly not be the tokenizer eventually
shipped to end-users (whether in Firefox or in binjs-ref), the plan is
to keep it both in binjs-ref and in SpiderMonkey (Nightly only) as a
tool for helping test the higher layers of Binjs.

MozReview-Commit-ID: 1i6XnVIf8p5

--HG--
extra : rebase_source : b8179766ff14dca6d4677931b0f490ac2b8385b4
This commit is contained in:
David Teller 2017-09-13 15:30:19 +02:00
parent 64691d17fb
commit 3ed292278c
14 changed files with 1418 additions and 0 deletions

View File

@ -0,0 +1,529 @@
#include "frontend/BinTokenReaderTester.h"
#include "mozilla/EndianUtils.h"
#include "gc/Zone.h"
namespace js {
namespace frontend {
using BinFields = BinTokenReaderTester::BinFields;
using AutoList = BinTokenReaderTester::AutoList;
using AutoTaggedTuple = BinTokenReaderTester::AutoTaggedTuple;
using AutoTuple = BinTokenReaderTester::AutoTuple;
BinTokenReaderTester::BinTokenReaderTester(JSContext* cx, const uint8_t* start, const size_t length)
: cx_(cx)
, start_(start)
, current_(start)
, stop_(start + length)
, latestKnownGoodPos_(0)
{ }
BinTokenReaderTester::BinTokenReaderTester(JSContext* cx, const Vector<uint8_t>& chars)
: cx_(cx)
, start_(chars.begin())
, current_(chars.begin())
, stop_(chars.end())
, latestKnownGoodPos_(0)
{ }
bool
BinTokenReaderTester::raiseError(const char* description)
{
MOZ_ASSERT(!cx_->isExceptionPending());
TokenPos pos;
latestTokenPos(pos);
JS_ReportErrorASCII(cx_, "BinAST parsing error: %s at offsets %u => %u",
description, pos.begin, pos.end);
return false;
}
bool
BinTokenReaderTester::readBuf(uint8_t* bytes, uint32_t len)
{
MOZ_ASSERT(!cx_->isExceptionPending());
MOZ_ASSERT(len > 0);
if (stop_ < current_ + len)
return raiseError("Buffer exceeds length");
for (uint32_t i = 0; i < len; ++i)
*bytes++ = *current_++;
return true;
}
bool
BinTokenReaderTester::readByte(uint8_t* byte)
{
return readBuf(byte, 1);
}
// Nullable booleans:
//
// 0 => false
// 1 => true
// 2 => null
bool
BinTokenReaderTester::readMaybeBool(Maybe<bool>& result)
{
updateLatestKnownGood();
uint8_t byte;
if (!readByte(&byte))
return false;
switch (byte) {
case 0:
result = Some(false);
break;
case 1:
result = Some(true);
break;
case 2:
result = Nothing();
break;
default:
return raiseError("Invalid boolean value");
}
return true;
}
bool
BinTokenReaderTester::readBool(bool& out)
{
Maybe<bool> result;
if (!readMaybeBool(result))
return false;
if (result.isNothing())
return raiseError("Empty boolean value");
out = *result;
return true;
}
// Nullable doubles (little-endian)
//
// 0x7FF0000000000001 (signaling NaN) => null
// anything other 64 bit sequence => IEEE-764 64-bit floating point number
bool
BinTokenReaderTester::readMaybeDouble(Maybe<double>& result)
{
updateLatestKnownGood();
uint8_t bytes[8];
MOZ_ASSERT(sizeof(bytes) == sizeof(double));
if (!readBuf(reinterpret_cast<uint8_t*>(bytes), ArrayLength(bytes)))
return false;
// Decode little-endian.
const uint64_t asInt = LittleEndian::readUint64(bytes);
if (asInt == 0x7FF0000000000001) {
result = Nothing();
} else {
// Canonicalize NaN, just to make sure another form of signalling NaN
// doesn't slip past us.
const double asDouble = CanonicalizeNaN(BitwiseCast<double>(asInt));
result = Some(asDouble);
}
return true;
}
bool
BinTokenReaderTester::readDouble(double& out)
{
Maybe<double> result;
if (!readMaybeDouble(result))
return false;
if (result.isNothing())
return raiseError("Empty double value");
out = *result;
return true;
}
// Internal uint32_t
//
// Encoded as 4 bytes, little-endian.
bool
BinTokenReaderTester::readInternalUint32(uint32_t* result)
{
uint8_t bytes[4];
MOZ_ASSERT(sizeof(bytes) == sizeof(uint32_t));
if (!readBuf(bytes, 4))
return false;
// Decode little-endian.
*result = LittleEndian::readUint32(bytes);
return true;
}
// Nullable strings:
// - "<string>" (not counted in byte length)
// - byte length (not counted in byte length)
// - bytes (UTF-8)
// - "</string>" (not counted in byte length)
//
// The special sequence of bytes `[255, 0]` (which is an invalid UTF-8 sequence)
// is reserved to `null`.
bool
BinTokenReaderTester::readMaybeChars(Maybe<Chars>& out)
{
updateLatestKnownGood();
if (!readConst("<string>"))
return false;
// 1. Read byteLength
uint32_t byteLen;
if (!readInternalUint32(&byteLen))
return false;
// 2. Reject if we can't read
if (current_ + byteLen < current_) // Check for overflows
return raiseError("Arithmetics overflow: string is too long");
if (current_ + byteLen > stop_)
return raiseError("Not enough bytes to read chars");
// 3. Check null string (no allocation)
if (byteLen == 2 && *current_ == 255 && *(current_ + 1) == 0) {
// Special case: null string.
out = Nothing();
current_ += byteLen;
return true;
}
// 4. Other strings (bytes are copied)
out.emplace(cx_);
if (!out->resize(byteLen)) {
ReportOutOfMemory(cx_);
return false;
}
PodCopy(out->begin(), current_, byteLen);
current_ += byteLen;
if (!readConst("</string>"))
return false;
return true;
}
bool
BinTokenReaderTester::readChars(Chars& out)
{
Maybe<Chars> result;
if (!readMaybeChars(result))
return false;
if (result.isNothing())
return raiseError("Empty string");
out = Move(*result);
return true;
}
template <size_t N>
bool
BinTokenReaderTester::matchConst(const char (&value)[N])
{
MOZ_ASSERT(N > 0);
MOZ_ASSERT(value[N - 1] == 0);
MOZ_ASSERT(!cx_->isExceptionPending());
if (current_ + N - 1 > stop_)
return false;
// Perform lookup, without side-effects.
if (!std::equal(current_, current_ + N - 1 /*implicit NUL*/, value))
return false;
// Looks like we have a match. Now perform side-effects
current_ += N - 1;
updateLatestKnownGood();
return true;
}
// Untagged tuple:
// - "<tuple>";
// - contents (specified by the higher-level grammar);
// - "</tuple>"
bool
BinTokenReaderTester::enterUntaggedTuple(AutoTuple& guard)
{
if (!readConst("<tuple>"))
return false;
guard.init();
return true;
}
template <size_t N>
bool
BinTokenReaderTester::readConst(const char (&value)[N])
{
updateLatestKnownGood();
if (!matchConst(value))
return raiseError("Could not find expected literal");
return true;
}
// Tagged tuples:
// - "<tuple>"
// - "<head>"
// - non-null string `name`, followed by \0 (see `readString()`);
// - uint32_t number of fields;
// - array of `number of fields` non-null strings followed each by \0 (see `readString()`);
// - "</head>"
// - content (specified by the higher-level grammar);
// - "</tuple>"
bool
BinTokenReaderTester::enterTaggedTuple(BinKind& tag, BinFields& fields, AutoTaggedTuple& guard)
{
// Header
if (!readConst("<tuple>"))
return false;
if (!readConst("<head>"))
return false;
// This would probably be much faster with a HashTable, but we don't
// really care about the speed of BinTokenReaderTester.
do {
#define FIND_MATCH(CONSTRUCTOR, NAME) \
if (matchConst(#NAME "\0")) { \
tag = BinKind::CONSTRUCTOR; \
break; \
} // else
FOR_EACH_BIN_KIND(FIND_MATCH)
#undef FIND_MATCH
// else
return raiseError("Invalid tag");
} while(false);
// Now fields.
uint32_t fieldNum;
if (!readInternalUint32(&fieldNum))
return false;
fields.clear();
if (!fields.reserve(fieldNum))
return raiseError("Out of memory");
for (uint32_t i = 0; i < fieldNum; ++i) {
// This would probably be much faster with a HashTable, but we don't
// really care about the speed of BinTokenReaderTester.
BinField field;
do {
#define FIND_MATCH(CONSTRUCTOR, NAME) \
if (matchConst(#NAME "\0")) { \
field = BinField::CONSTRUCTOR; \
break; \
} // else
FOR_EACH_BIN_FIELD(FIND_MATCH)
#undef FIND_MATCH
// else
return raiseError("Invalid field");
} while (false);
// Make sure that we do not have duplicate fields.
// Search is linear, but again, we don't really care
// in this implementation.
for (uint32_t j = 0; j < i; ++j) {
if (fields[j] == field) {
return raiseError("Duplicate field");
}
}
fields.infallibleAppend(field); // Already checked.
}
// End of header
if (!readConst("</head>"))
return false;
// Enter the body.
guard.init();
return true;
}
// List:
//
// - "<list>" (not counted in byte length);
// - uint32_t byte length (not counted in byte length);
// - uint32_t number of items;
// - contents (specified by higher-level grammar);
// - "</list>" (not counted in byte length)
//
// The total byte length of `number of items` + `contents` must be `byte length`.
bool
BinTokenReaderTester::enterList(uint32_t& items, AutoList& guard)
{
if (!readConst("<list>"))
return false;
uint32_t byteLen;
if (!readInternalUint32(&byteLen))
return false;
const uint8_t* stop = current_ + byteLen;
if (stop < current_) // Check for overflows
return raiseError("Arithmetics overflow: list is too long");
if (stop > this->stop_)
return raiseError("Incorrect list length");
guard.init(stop);
if (!readInternalUint32(&items))
return false;
return true;
}
void
BinTokenReaderTester::updateLatestKnownGood()
{
MOZ_ASSERT(current_ >= start_);
const size_t update = current_ - start_;
MOZ_ASSERT(update >= latestKnownGoodPos_);
latestKnownGoodPos_ = update;
}
size_t
BinTokenReaderTester::offset() const
{
return latestKnownGoodPos_;
}
void
BinTokenReaderTester::latestTokenPos(TokenPos& pos)
{
pos.begin = latestKnownGoodPos_;
pos.end = current_ - start_;
MOZ_ASSERT(pos.end >= pos.begin);
}
void
BinTokenReaderTester::AutoBase::init()
{
initialized_ = true;
}
BinTokenReaderTester::AutoBase::AutoBase(BinTokenReaderTester& reader)
: reader_(reader)
{ }
BinTokenReaderTester::AutoBase::~AutoBase()
{
// By now, the `AutoBase` must have been deinitialized by calling `done()`.
// The only case in which we can accept not calling `done()` is if we have
// bailed out because of an error.
MOZ_ASSERT_IF(initialized_, reader_.cx_->isExceptionPending());
}
bool
BinTokenReaderTester::AutoBase::checkPosition(const uint8_t* expectedEnd)
{
if (reader_.current_ != expectedEnd)
return reader_.raiseError("Caller did not consume the expected set of bytes");
return true;
}
BinTokenReaderTester::AutoList::AutoList(BinTokenReaderTester& reader)
: AutoBase(reader)
{ }
void
BinTokenReaderTester::AutoList::init(const uint8_t* expectedEnd)
{
AutoBase::init();
this->expectedEnd_ = expectedEnd;
}
bool
BinTokenReaderTester::AutoList::done()
{
MOZ_ASSERT(initialized_);
initialized_ = false;
if (reader_.cx_->isExceptionPending()) {
// Already errored, no need to check further.
return false;
}
// Check that we have consumed the exact number of bytes.
if (!checkPosition(expectedEnd_))
return false;
// Check suffix.
if (!reader_.readConst("</list>"))
return false;
return true;
}
BinTokenReaderTester::AutoTaggedTuple::AutoTaggedTuple(BinTokenReaderTester& reader)
: AutoBase(reader)
{ }
bool
BinTokenReaderTester::AutoTaggedTuple::done()
{
MOZ_ASSERT(initialized_);
initialized_ = false;
if (reader_.cx_->isExceptionPending()) {
// Already errored, no need to check further.
return false;
}
// Check suffix.
if (!reader_.readConst("</tuple>"))
return false;
return true;
}
BinTokenReaderTester::AutoTuple::AutoTuple(BinTokenReaderTester& reader)
: AutoBase(reader)
{ }
bool
BinTokenReaderTester::AutoTuple::done()
{
MOZ_ASSERT(initialized_);
initialized_ = false;
if (reader_.cx_->isExceptionPending()) {
// Already errored, no need to check further.
return false;
}
// Check suffix.
if (!reader_.readConst("</tuple>"))
return false;
return true;
}
} // namespace frontend
} // namespace js

View File

@ -0,0 +1,356 @@
#ifndef frontend_BinTokenReaderTester_h
#define frontend_BinTokenReaderTester_h
#include "mozilla/Maybe.h"
#include "frontend/BinToken.h"
#include "frontend/TokenStream.h"
#include "js/TypeDecls.h"
#if !defined(NIGHTLY_BUILD)
#error "BinTokenReaderTester.* is designed to help test implementations of successive versions of JS BinaryAST. It is available only on Nightly."
#endif // !defined(NIGHTLY_BUILD)
namespace js {
namespace frontend {
using namespace mozilla;
using namespace JS;
/**
* A token reader for a simple, alternative serialization format for BinAST.
*
* This serialization format, which is also supported by the reference
* implementation of the BinAST compression suite, is designed to be
* mostly human-readable and easy to check for all sorts of deserialization
* errors. While this format is NOT designed to be shipped to end-users, it
* is nevertheless a very useful tool for implementing and testing parsers.
*
* Both the format and the implementation are ridiculously inefficient:
*
* - the underlying format tags almost all its data with e.g. `<tuple>`, `</tuple>`
* to aid with detecting offset errors or format error;
* - the underlying format copies list of fields into every single node, instead
* of keeping them once in the header;
* - every kind/field extraction requires memory allocation and plenty of string
* comparisons;
* - ...
*
* This token reader is designed to be API-compatible with the standard, shipped,
* token reader. For these reasons:
*
* - it does not support any form of look ahead, push back;
* - it does not support any form of error recovery.
*/
class MOZ_STACK_CLASS BinTokenReaderTester
{
public:
// A list of fields, in the order in which they appear in the stream.
using BinFields = Vector<BinField, 8>;
// A bunch of characters. At this stage, there is no guarantee on whether
// they are valid UTF-8. Future versions may replace this by slice into
// the buffer.
using Chars = Vector<uint8_t, 32>;
class AutoList;
class AutoTuple;
class AutoTaggedTuple;
public:
/**
* Construct a token reader.
*
* Does NOT copy the buffer.
*/
BinTokenReaderTester(JSContext* cx, const uint8_t* start, const size_t length);
/**
* Construct a token reader.
*
* Does NOT copy the buffer.
*/
BinTokenReaderTester(JSContext* cx, const Vector<uint8_t>& chars);
// --- Primitive values.
//
// Note that the underlying format allows for a `null` value for primitive
// values.
//
// Reading will return an error either in case of I/O error or in case of
// a format problem. Reading if an exception in pending is an error and
// will cause assertion failures. Do NOT attempt to read once an exception
// has been cleared: the token reader does NOT support recovery, by design.
/**
* Read a single `true | false | null` value.
*
* @param out Set to `Nothing` if the data specifies that the value is `null`.
* Otherwise, `Some(true)` or `Some(false)`.
*
* @return false If a boolean could not be read. In this case, an error
* has been raised.
*/
MOZ_MUST_USE bool readMaybeBool(Maybe<bool>& out);
MOZ_MUST_USE bool readBool(bool& out);
/**
* Read a single `number | null` value.
*
* @param out Set to `Nothing` if the data specifies that the value is `null`.
* Otherwise, `Some(x)`, where `x` is a valid `double` (i.e. either a non-NaN
* or a canonical NaN).
*
* @return false If a double could not be read. In this case, an error
* has been raised.
*/
MOZ_MUST_USE bool readMaybeDouble(Maybe<double>& out);
MOZ_MUST_USE bool readDouble(double& out);
/**
* Read a single `string | null` value.
*
* @param out Set to `Nothing` if the data specifies that the value is `null`.
* Otherwise, `Some(x)`, where `x` is a `string`.
*
* WARNING: At this stage, the `string` encoding has NOT been validated.
*
* @return false If a string could not be read. In this case, an error
* has been raised.
*/
MOZ_MUST_USE bool readMaybeChars(Maybe<Chars>& out);
MOZ_MUST_USE bool readChars(Chars& out);
// --- Composite values.
//
// The underlying format does NOT allows for a `null` composite value.
//
// Reading will return an error either in case of I/O error or in case of
// a format problem. Reading from a poisoned tokenizer is an error and
// will cause assertion failures.
/**
* Start reading a list.
*
* @param length (OUT) The number of elements in the list.
* @param guard (OUT) A guard, ensuring that we read the list correctly.
*
* The `guard` is dedicated to ensuring that reading the list has consumed
* exactly all the bytes from that list. The `guard` MUST therefore be
* destroyed at the point where the caller has reached the end of the list.
* If the caller has consumed too few/too many bytes, this will be reported
* in the call go `guard.done()`.
*
* @return out If the header of the list is invalid.
*/
MOZ_MUST_USE bool enterList(uint32_t& length, AutoList& guard);
/**
* Start reading a tagged tuple.
*
* @param tag (OUT) The tag of the tuple.
* @param fields (OUT) The ORDERED list of fields encoded in this tuple.
* @param guard (OUT) A guard, ensuring that we read the tagged tuple correctly.
*
* The `guard` is dedicated to ensuring that reading the list has consumed
* exactly all the bytes from that tuple. The `guard` MUST therefore be
* destroyed at the point where the caller has reached the end of the tuple.
* If the caller has consumed too few/too many bytes, this will be reported
* in the call go `guard.done()`.
*
* @return out If the header of the tuple is invalid.
*/
MOZ_MUST_USE bool enterTaggedTuple(BinKind& tag, BinTokenReaderTester::BinFields& fields, AutoTaggedTuple& guard);
/**
* Start reading an untagged tuple.
*
* @param guard (OUT) A guard, ensuring that we read the tuple correctly.
*
* The `guard` is dedicated to ensuring that reading the list has consumed
* exactly all the bytes from that tuple. The `guard` MUST therefore be
* destroyed at the point where the caller has reached the end of the tuple.
* If the caller has consumed too few/too many bytes, this will be reported
* in the call go `guard.done()`.
*
* @return out If the header of the tuple is invalid.
*/
MOZ_MUST_USE bool enterUntaggedTuple(AutoTuple& guard);
/**
* Return the position of the latest token.
*/
void latestTokenPos(TokenPos& out);
size_t offset() const;
/**
* Raise an error.
*
* Once `raiseError` has been called, the tokenizer is poisoned.
*/
MOZ_MUST_USE bool raiseError(const char* description);
/**
* Poison this tokenizer.
*/
void poison();
private:
/**
* Read a single byte.
*/
MOZ_MUST_USE bool readByte(uint8_t* byte);
/**
* Read several bytes.
*
* If there is not enough data, or if the tokenizer has previously been
* poisoned, return `false` and report an exception.
*/
MOZ_MUST_USE bool readBuf(uint8_t* bytes, uint32_t len);
/**
* Read a single uint32_t.
*/
MOZ_MUST_USE bool readInternalUint32(uint32_t*);
/**
* Read a sequence of chars, ensuring that they match an expected
* sequence of chars.
*
* @param value The sequence of chars to expect, NUL-terminated. The NUL
* is not expected in the stream.
*/
template <size_t N>
MOZ_MUST_USE bool readConst(const char (&value)[N]);
/**
* Read a sequence of chars, consuming the bytes only if they match an expected
* sequence of chars.
*
* @param value The sequence of chars to expect, NUL-terminated. The NUL
* is not expected in the stream.
* @return true if `value` (minus NUL) represents the next few chars in the
* internal buffer, false otherwise. If `true`, the chars are consumed,
* otherwise there is no side-effect.
*/
template <size_t N>
MOZ_MUST_USE bool matchConst(const char (&value)[N]);
/**
* Update the "latest known good" position, which is used during error
* reporting.
*/
void updateLatestKnownGood();
private:
JSContext* cx_;
// `true` if we have encountered an error. Errors are non recoverable.
// Attempting to read from a poisoned tokenizer will cause assertion errors.
bool poisoned_;
// The first byte of the buffer. Not owned.
const uint8_t* start_;
// The current position.
const uint8_t* current_;
// The last+1 byte of the buffer.
const uint8_t* stop_;
// Latest known good position. Used for error reporting.
size_t latestKnownGoodPos_;
BinTokenReaderTester(const BinTokenReaderTester&) = delete;
BinTokenReaderTester(BinTokenReaderTester&&) = delete;
BinTokenReaderTester& operator=(BinTokenReaderTester&) = delete;
public:
// The following classes are used whenever we encounter a tuple/tagged tuple/list
// to make sure that:
//
// - if the construct "knows" its byte length, we have exactly consumed all
// the bytes (otherwise, this means that the file is corrupted, perhaps on
// purpose, so we need to reject the stream);
// - if the construct has a footer, once we are done reading it, we have
// reached the footer (this is to aid with debugging).
//
// In either case, the caller MUST call method `done()` of the guard once
// it is done reading the tuple/tagged tuple/list, to report any pending error.
// Base class used by other Auto* classes.
class MOZ_STACK_CLASS AutoBase
{
protected:
explicit AutoBase(BinTokenReaderTester& reader);
~AutoBase();
// Raise an error if we are not in the expected position.
MOZ_MUST_USE bool checkPosition(const uint8_t* expectedPosition);
friend BinTokenReaderTester;
void init();
// Set to `true` if `init()` has been called. Reset to `false` once
// all conditions have been checked.
bool initialized_;
BinTokenReaderTester& reader_;
};
// Guard class used to ensure that `enterList` is used properly.
class MOZ_STACK_CLASS AutoList : public AutoBase
{
public:
explicit AutoList(BinTokenReaderTester& reader);
// Check that we have properly read to the end of the list.
MOZ_MUST_USE bool done();
protected:
friend BinTokenReaderTester;
void init(const uint8_t* expectedEnd);
private:
const uint8_t* expectedEnd_;
};
// Guard class used to ensure that `enterTaggedTuple` is used properly.
class MOZ_STACK_CLASS AutoTaggedTuple : public AutoBase
{
public:
explicit AutoTaggedTuple(BinTokenReaderTester& reader);
// Check that we have properly read to the end of the tuple.
MOZ_MUST_USE bool done();
};
// Guard class used to ensure that `readTuple` is used properly.
class MOZ_STACK_CLASS AutoTuple : public AutoBase
{
public:
explicit AutoTuple(BinTokenReaderTester& reader);
// Check that we have properly read to the end of the tuple.
MOZ_MUST_USE bool done();
};
// Compare a `Chars` and a string literal (ONLY a string literal).
template <size_t N>
static bool equals(const Chars& left, const char (&right)[N]) {
MOZ_ASSERT(N > 0);
MOZ_ASSERT(right[N - 1] == 0);
if (left.length() + 1 /* implicit NUL */ != N)
return false;
if (!std::equal(left.begin(), left.end(), right))
return false;
return true;
}
};
} // namespace frontend
} // namespace js
#endif // frontend_BinTokenReaderTester_h

View File

@ -0,0 +1 @@
<tuple></tuple>

View File

@ -137,6 +137,16 @@ if CONFIG['ENABLE_STREAMS']:
'testReadableStream.cpp',
]
if CONFIG['JS_BUILD_BINAST'] and CONFIG['JS_STANDALONE']:
# Standalone builds leave the source directory untouched,
# which lets us run tests with the data files intact.
# Otherwise, in the current state of the build system,
# we can't have data files in js/src tests.
UNIFIED_SOURCES += [
'testBinTokenReaderTester.cpp'
]
DEFINES['EXPORT_JS_API'] = True
LOCAL_INCLUDES += [

View File

@ -0,0 +1,197 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/Vector.h"
#if defined(XP_UNIX)
#include <dirent.h>
#include <sys/stat.h>
#elif defined(XP_WIN)
#include <windows.h>
#endif
#include "frontend/BinSource.h"
#include "frontend/FullParseHandler.h"
#include "frontend/ParseContext.h"
#include "frontend/Parser.h"
#include "jsapi-tests/tests.h"
using UsedNameTracker = js::frontend::UsedNameTracker;
extern void readFull(const char* path, Vector<uint8_t>& buf);
void readFull(JSContext* cx, const char* path, Vector<char16_t>& buf) {
buf.shrinkTo(0);
Vector<uint8_t> intermediate(cx);
readFull(path, intermediate);
if (!buf.appendAll(intermediate))
MOZ_CRASH();
}
BEGIN_TEST(testBinASTReaderECMAScript2)
{
const char BIN_SUFFIX[] = ".binjs";
const char TXT_SUFFIX[] = ".js";
CompileOptions options(cx);
options.setIntroductionType("unit test parse")
.setFileAndLine("<string>", 1);
#if defined(XP_UNIX)
const char PATH[] = "jsapi-tests/binast/parser/tester/";
// Read the list of files in the directory.
DIR* dir = opendir(PATH);
if (!dir)
MOZ_CRASH();
while (auto entry = readdir(dir)) {
// Find files whose name ends with ".binjs".
const char* d_name = entry->d_name;
#elif defined(XP_WIN)
const char PATH[] = "jsapi-tests\\binast\\parser\\tester\\*.binjs";
WIN32_FIND_DATA FindFileData;
HANDLE hFind = FindFirstFile(PATH, &FindFileData);
for (bool found = (hFind != INVALID_HANDLE_VALUE);
found;
found = FindNextFile(hFind, &FindFileData)
{
const char* d_name = FindFileData.cFileName;
#endif // defined(XP_UNIX) || defined(XP_WIN)
const size_t namlen = strlen(d_name);
if (namlen < sizeof(BIN_SUFFIX))
continue;
if (strncmp(d_name + namlen - (sizeof(BIN_SUFFIX) - 1), BIN_SUFFIX, sizeof(BIN_SUFFIX)) != 0)
continue;
// Find text file.
UniqueChars txtPath(static_cast<char*>(js_malloc(namlen + sizeof(PATH) + 1)));
strncpy(txtPath.get(), PATH, sizeof(PATH));
strncpy(txtPath.get() + sizeof(PATH) - 1, d_name, namlen);
strncpy(txtPath.get() + sizeof(PATH) + namlen - sizeof(BIN_SUFFIX), TXT_SUFFIX, sizeof(TXT_SUFFIX));
txtPath[sizeof(PATH) + namlen - sizeof(BIN_SUFFIX) + sizeof(TXT_SUFFIX) - 1] = 0;
fprintf(stderr, "Testing %s\n", txtPath.get());
// Read text file.
Vector<char16_t> txtSource(cx);
readFull(cx, txtPath.get(), txtSource);
// Parse text file.
UsedNameTracker txtUsedNames(cx);
if (!txtUsedNames.init())
MOZ_CRASH();
js::frontend::Parser<js::frontend::FullParseHandler, char16_t> parser(cx, cx->tempLifoAlloc(), options, txtSource.begin(), txtSource.length(),
/* foldConstants = */ false, txtUsedNames, nullptr,
nullptr);
if (!parser.checkOptions())
MOZ_CRASH();
auto txtParsed = parser.parse(); // Will be deallocated once `parser` goes out of scope.
RootedValue txtExn(cx);
if (!txtParsed) {
// Save exception for more detailed error message, if necessary.
if (!js::GetAndClearException(cx, &txtExn))
MOZ_CRASH();
}
// Read binary file.
UniqueChars binPath(static_cast<char*>(js_malloc(namlen + sizeof(PATH) + 1)));
strncpy(binPath.get(), PATH, sizeof(PATH));
strncpy(binPath.get() + sizeof(PATH) - 1, d_name, namlen);
binPath[namlen + sizeof(PATH) - 1] = 0;
Vector<uint8_t> binSource(cx);
readFull(binPath.get(), binSource);
// Parse binary file.
js::frontend::UsedNameTracker binUsedNames(cx);
if (!binUsedNames.init())
MOZ_CRASH();
js::frontend::BinASTParser reader(cx, cx->tempLifoAlloc(), binUsedNames, options);
auto binParsed = reader.parse(binSource); // Will be deallocated once `reader` goes out of scope.
RootedValue binExn(cx);
if (binParsed.isErr()) {
// Save exception for more detailed error message, if necessary.
if (!js::GetAndClearException(cx, &binExn))
MOZ_CRASH();
}
// The binary parser should accept the file iff the text parser has.
if (binParsed.isOk() && !txtParsed) {
fprintf(stderr, "Text file parsing failed: ");
js::ErrorReport report(cx);
if (!report.init(cx, txtExn, js::ErrorReport::WithSideEffects))
MOZ_CRASH();
PrintError(cx, stderr, report.toStringResult(), report.report(), /* reportWarnings */ true);
MOZ_CRASH("Binary parser accepted a file that text parser rejected");
}
if (binParsed.isErr() && txtParsed) {
fprintf(stderr, "Binary file parsing failed: ");
js::ErrorReport report(cx);
if (!report.init(cx, binExn, js::ErrorReport::WithSideEffects))
MOZ_CRASH();
PrintError(cx, stderr, report.toStringResult(), report.report(), /* reportWarnings */ true);
MOZ_CRASH("Binary parser rejected a file that text parser accepted");
}
if (binParsed.isErr()) {
fprintf(stderr, "Binary parser and text parser agree that %s is invalid\n", txtPath.get());
continue;
}
#if defined(DEBUG) // Dumping an AST is only defined in DEBUG builds
// Compare ASTs.
Sprinter binPrinter(cx);
if (!binPrinter.init())
MOZ_CRASH();
DumpParseTree(binParsed.unwrap(), binPrinter);
Sprinter txtPrinter(cx);
if (!txtPrinter.init())
MOZ_CRASH();
DumpParseTree(txtParsed, txtPrinter);
if (strcmp(binPrinter.string(), txtPrinter.string()) != 0) {
fprintf(stderr, "Got distinct ASTs when parsing %s:\n\tBINARY\n%s\n\n\tTEXT\n%s\n", txtPath.get(), binPrinter.string(), txtPrinter.string());
MOZ_CRASH();
}
fprintf(stderr, "Got the same AST when parsing %s\n", txtPath.get());
#endif // defined(DEBUG)
}
#if defined(XP_WIN)
if (!FindClose(hFind))
MOZ_CRASH("Could not close Find");
#endif // defined(XP_WIN)
return true;
}
END_TEST(testBinASTReaderECMAScript2)

View File

@ -0,0 +1,321 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <string.h>
#include <sys/stat.h>
#if defined (XP_WIN)
#include <windows.h>
#elif defined(XP_UNIX)
#include <fcntl.h>
#include <unistd.h>
#endif // defined (XP_WIN) || defined (XP_UNIX)
#include "frontend/BinTokenReaderTester.h"
#include "gc/Zone.h"
#include "js/Vector.h"
#include "jsapi-tests/tests.h"
using Tokenizer = js::frontend::BinTokenReaderTester;
using Chars = Tokenizer::Chars;
// Hack: These tests need access to resources, which are present in the source dir
// but not copied by our build system. To simplify things, we chdir to the source
// dir at the start of each test and return to the previous directory afterwards.
#if defined(XP_UNIX)
#include <sys/param.h>
static int gJsDirectory(0);
void enterJsDirectory() {
// Save current directory.
MOZ_ASSERT(gJsDirectory == 0);
gJsDirectory = open(".", O_RDONLY);
MOZ_ASSERT(gJsDirectory != 0, "Could not open directory '.'");
// Go to the directory provided by the test harness, if any.
const char* destination = getenv("CPP_UNIT_TESTS_DIR_JS_SRC");
if (destination) {
if (chdir(destination) == -1)
MOZ_CRASH_UNSAFE_PRINTF("Could not chdir to %s", destination);
}
}
void exitJsDirectory() {
MOZ_ASSERT(gJsDirectory);
if (fchdir(gJsDirectory) == -1)
MOZ_CRASH("Could not return to original directory");
if (close(gJsDirectory) != 0)
MOZ_CRASH("Could not close js directory");
gJsDirectory = 0;
}
#else
char gJsDirectory[MAX_PATH] = { 0 };
void enterJsDirectory() {
// Save current directory.
MOZ_ASSERT(strlen(gJsDirectory) == 0);
auto result = GetCurrentDirectory(MAX_PATH, gJsDirectory);
if (result <= 0)
MOZ_CRASH("Could not get current directory");
if (result > MAX_PATH)
MOZ_CRASH_UNSAFE_PRINTF("Could not get current directory: needed %ld bytes, got %ld\n", result, MAX_PATH);
// Find destination directory, if any.
char destination[MAX_PATH];
if (!GetEnvironmentVariable("CPP_UNIT_TESTS_DIR_JS_SRC", destination, MAX_PATH)) {
if (GetLastError() != ERROR_ENVVAR_NOT_FOUND)
MOZ_CRASH("Could not get CPP_UNIT_TESTS_DIR_JS_SRC");
else
return;
}
// Go to the directory.
if (SetCurrentDirectory(destination) != 0)
MOZ_CRASH_UNSAFE_PRINTF("Could not chdir to %s", destination);
}
void exitJsDirectory() {
MOZ_ASSERT(strlen(gJsDirectory) > 0);
if (SetCurrentDirectory(gJsDirectory) != 0)
MOZ_CRASH("Could not return to original directory");
gJsDirectory[0] = 0;
}
#endif // defined(XP_UNIX) || defined(XP_WIN)
void readFull(const char* path, js::Vector<uint8_t>& buf) {
enterJsDirectory();
buf.shrinkTo(0);
FILE* in = fopen(path, "r");
if (!in)
MOZ_CRASH_UNSAFE_PRINTF("Could not open %s: %s", path, strerror(errno));
struct stat info;
if (stat(path, &info) < 0)
MOZ_CRASH_UNSAFE_PRINTF("Could not get stat on %s", path);
if (!buf.growBy(info.st_size))
MOZ_CRASH("OOM");
int result = fread(buf.begin(), 1, info.st_size, in);
if (fclose(in) != 0)
MOZ_CRASH("Could not close input file");
if (result != info.st_size)
MOZ_CRASH("Read error");
exitJsDirectory();
}
// Reading a simple string.
BEGIN_TEST(testBinTokenReaderTesterSimpleString)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-simple-string.binjs", contents);
Tokenizer tokenizer(cx, contents);
Maybe<Chars> found;
CHECK(tokenizer.readMaybeChars(found));
CHECK(Tokenizer::equals(*found, "simple string")); // FIXME: Find a way to make CHECK_EQUAL use `Tokenizer::equals`.
return true;
}
END_TEST(testBinTokenReaderTesterSimpleString)
// Reading a string with embedded 0.
BEGIN_TEST(testBinTokenReaderTesterStringWithEscapes)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-string-with-escapes.binjs", contents);
Tokenizer tokenizer(cx, contents);
Maybe<Chars> found;
CHECK(tokenizer.readMaybeChars(found));
CHECK(Tokenizer::equals(*found, "string with escapes \0\1\0")); // FIXME: Find a way to make CHECK_EQUAL use `Tokenizer::equals`.
return true;
}
END_TEST(testBinTokenReaderTesterStringWithEscapes)
// Reading an empty untagged tuple
BEGIN_TEST(testBinTokenReaderTesterEmptyUntaggedTuple)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-empty-untagged-tuple.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
Tokenizer::AutoTuple guard(tokenizer);
CHECK(tokenizer.enterUntaggedTuple(guard));
CHECK(guard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterEmptyUntaggedTuple)
// Reading a untagged tuple with two strings
BEGIN_TEST(testBinTokenReaderTesterTwoStringsInTuple)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-trivial-untagged-tuple.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
Tokenizer::AutoTuple guard(tokenizer);
CHECK(tokenizer.enterUntaggedTuple(guard));
Maybe<Chars> found_0;
CHECK(tokenizer.readMaybeChars(found_0));
CHECK(Tokenizer::equals(*found_0, "foo")); // FIXME: Find a way to make CHECK_EQUAL use `Tokenizer::equals`.
Maybe<Chars> found_1;
CHECK(tokenizer.readMaybeChars(found_1));
CHECK(Tokenizer::equals(*found_1, "bar")); // FIXME: Find a way to make CHECK_EQUAL use `Tokenizer::equals`.
CHECK(guard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterTwoStringsInTuple)
// Reading a tagged tuple `Pattern { id: "foo", value: 3.1415}`
BEGIN_TEST(testBinTokenReaderTesterSimpleTaggedTuple)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-simple-tagged-tuple.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
js::frontend::BinKind tag;
Tokenizer::BinFields fields(cx);
Tokenizer::AutoTaggedTuple guard(tokenizer);
CHECK(tokenizer.enterTaggedTuple(tag, fields, guard));
CHECK(tag == js::frontend::BinKind::Pattern);
Maybe<Chars> found_id;
const double EXPECTED_value = 3.1415;
Maybe<double> found_value;
// Order of fields is not deterministic in the encoder (we could make
// it deterministic for the test, though, since we already know the binary).
if (fields[0] == js::frontend::BinField::Id) {
CHECK(fields[1] == js::frontend::BinField::Value);
CHECK(tokenizer.readMaybeChars(found_id));
CHECK(tokenizer.readMaybeDouble(found_value));
} else if (fields[0] == js::frontend::BinField::Value) {
CHECK(fields[1] == js::frontend::BinField::Id);
CHECK(tokenizer.readMaybeDouble(found_value));
CHECK(tokenizer.readMaybeChars(found_id));
} else {
CHECK(false);
}
CHECK(EXPECTED_value == *found_value); // Apparently, CHECK_EQUAL doesn't work on `double`.
CHECK(Tokenizer::equals(*found_id, "foo"));
CHECK(guard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterSimpleTaggedTuple)
// Reading an empty list
BEGIN_TEST(testBinTokenReaderTesterEmptyList)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-empty-list.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
uint32_t length;
Tokenizer::AutoList guard(tokenizer);
CHECK(tokenizer.enterList(length, guard));
CHECK(length == 0);
CHECK(guard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterEmptyList)
// Reading `["foo", "bar"]`
BEGIN_TEST(testBinTokenReaderTesterSimpleList)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-trivial-list.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
uint32_t length;
Tokenizer::AutoList guard(tokenizer);
CHECK(tokenizer.enterList(length, guard));
CHECK(length == 2);
Maybe<Chars> found_0;
CHECK(tokenizer.readMaybeChars(found_0));
CHECK(Tokenizer::equals(*found_0, "foo"));
Maybe<Chars> found_1;
CHECK(tokenizer.readMaybeChars(found_1));
CHECK(Tokenizer::equals(*found_1, "bar"));
CHECK(guard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterSimpleList)
// Reading `[["foo", "bar"]]`
BEGIN_TEST(testBinTokenReaderTesterNestedList)
{
js::Vector<uint8_t> contents(cx);
readFull("jsapi-tests/binast/tokenizer/tester/test-nested-lists.binjs", contents);
Tokenizer tokenizer(cx, contents);
{
uint32_t outerLength;
Tokenizer::AutoList outerGuard(tokenizer);
CHECK(tokenizer.enterList(outerLength, outerGuard));
CHECK(outerLength == 1);
{
uint32_t innerLength;
Tokenizer::AutoList innerGuard(tokenizer);
CHECK(tokenizer.enterList(innerLength, innerGuard));
CHECK(innerLength == 2);
Maybe<Chars> found_0;
CHECK(tokenizer.readMaybeChars(found_0));
CHECK(Tokenizer::equals(*found_0, "foo"));
Maybe<Chars> found_1;
CHECK(tokenizer.readMaybeChars(found_1));
CHECK(Tokenizer::equals(*found_1, "bar"));
CHECK(innerGuard.done());
}
CHECK(outerGuard.done());
}
return true;
}
END_TEST(testBinTokenReaderTesterNestedList)

View File

@ -644,6 +644,10 @@ if CONFIG['NIGHTLY_BUILD']:
if CONFIG['JS_BUILD_BINAST']:
# Using SOURCES as UNIFIED_SOURCES causes mysterious bugs on 32-bit platforms.
# These parts of BinAST are designed only to test evolutions of the
# specification.
SOURCES += ['frontend/BinTokenReaderTester.cpp']
# These parts of BinAST should eventually move to release.
SOURCES += ['frontend/BinToken.cpp']
# Wasm code should use WASM_HUGE_MEMORY instead of JS_CODEGEN_X64