Bug 1666274 - Add WellKnownParserAtoms_ROM table r=djvj

Use constexpr initialization for bake ParserAtomEntries for tiny well-known
atoms into the executable. This is done in a new type with a constexpr
constructor that computes the correct atom data. Some StaticStrings helper
methods must be moved to the header in order to compute constexpr values.

Since some CommonPropertyNames are already encoded in the tiny atom tables,
we must take care not to generate a duplicate atom. To achieve this we split
the FOR_EACH_COMMON_PROPERTYNAME list into two. The parser atoms for this
named tiny strings use a new initTinyStringAlias method to avoid duplicates.

Differential Revision: https://phabricator.services.mozilla.com/D91087
This commit is contained in:
Ted Campbell 2020-09-23 19:47:18 +00:00
parent 325b5b3b35
commit bd0e9ab215
5 changed files with 185 additions and 155 deletions

View File

@ -504,13 +504,11 @@ bool WellKnownParserAtoms::initSingle(JSContext* cx, const ParserName** name,
MOZ_ASSERT(FindSmallestEncoding(UTF8Chars(str, len)) ==
JS::SmallestEncoding::ASCII);
// If we already reserved a tiny name, reuse the allocation but still point
// the fixed `name` reference at it.
if (const ParserAtom* tiny = lookupTiny(str, len)) {
MOZ_ASSERT(len == 1 || len == 2);
*name = tiny->asName();
return true;
}
// Strings matched by lookupTiny are stored in static table and aliases should
// only be added using initTinyStringAlias.
MOZ_ASSERT(lookupTiny(str, len) == nullptr,
"Well-known atom matches a tiny StaticString. Did you add it to "
"the wrong CommonPropertyNames.h list?");
InflatedChar16Sequence<Latin1Char> seq(
reinterpret_cast<const Latin1Char*>(str), len);
@ -535,70 +533,50 @@ bool WellKnownParserAtoms::initSingle(JSContext* cx, const ParserName** name,
return true;
}
bool WellKnownParserAtoms::initStaticStrings(JSContext* cx) {
// Create known ParserAtoms for length-1 Latin1 strings.
static_assert(WellKnownParserAtoms::ASCII_STATIC_LIMIT <=
StaticStrings::UNIT_STATIC_LIMIT);
constexpr size_t NUM_LENGTH1 = WellKnownParserAtoms::ASCII_STATIC_LIMIT;
for (size_t i = 0; i < NUM_LENGTH1; ++i) {
JS::AutoCheckCannotGC nogc;
JSAtom* atom = cx->staticStrings().getUnit(char16_t(i));
bool WellKnownParserAtoms::initTinyStringAlias(JSContext* cx,
const ParserName** name,
const char* str) {
MOZ_ASSERT(name != nullptr);
constexpr size_t len = 1;
MOZ_ASSERT(atom->length() == len);
unsigned int len = strlen(str);
InflatedChar16Sequence<Latin1Char> seq(atom->latin1Chars(nogc), len);
SpecificParserAtomLookup<Latin1Char> lookup(seq);
HashNumber hash = lookup.hash();
// Well-known atoms are all currently ASCII with length <= MaxWellKnownLength.
MOZ_ASSERT(len <= MaxWellKnownLength);
MOZ_ASSERT(FindSmallestEncoding(UTF8Chars(str, len)) ==
JS::SmallestEncoding::ASCII);
auto maybeEntry = ParserAtomEntry::allocate<Latin1Char>(cx, seq, len, hash);
if (maybeEntry.isErr()) {
return false;
}
length1StaticTable_[i] = maybeEntry.unwrap();
length1StaticTable_[i]->setStaticParserString1(StaticParserString1(i));
}
// Create known ParserAtoms for length-2 alpha-num strings.
constexpr size_t NUM_LENGTH2 = NUM_SMALL_CHARS * NUM_SMALL_CHARS;
for (size_t i = 0; i < NUM_LENGTH2; ++i) {
JS::AutoCheckCannotGC nogc;
JSAtom* atom = cx->staticStrings().getLength2FromIndex(i);
constexpr size_t len = 2;
MOZ_ASSERT(atom->length() == len);
InflatedChar16Sequence<Latin1Char> seq(atom->latin1Chars(nogc), len);
SpecificParserAtomLookup<Latin1Char> lookup(seq);
HashNumber hash = lookup.hash();
auto maybeEntry = ParserAtomEntry::allocate<Latin1Char>(cx, seq, len, hash);
if (maybeEntry.isErr()) {
return false;
}
length2StaticTable_[i] = maybeEntry.unwrap();
length2StaticTable_[i]->setStaticParserString2(StaticParserString2(i));
}
// NOTE: If this assert fails, you may need to change which list is it belongs
// to in CommonPropertyNames.h.
const ParserAtom* tiny = lookupTiny(str, len);
MOZ_ASSERT(tiny, "Tiny common name was not found");
// Set alias to existing atom.
*name = tiny->asName();
return true;
}
bool WellKnownParserAtoms::init(JSContext* cx) {
// Initialize the tiny strings before common names since there are some short
// common names.
if (!initStaticStrings(cx)) {
return false;
}
// NOTE: Well-known tiny strings (with length <= 2) are stored in the
// WellKnownParserAtoms_ROM table. This uses static constexpr initialization
// so we don't need to do anything here.
// Tiny strings with a common name need a named alias to an entry in the
// WellKnownParserAtoms_ROM.
#define COMMON_NAME_INIT_(idpart, id, text) \
if (!initTinyStringAlias(cx, &(id), text)) { \
return false; \
}
FOR_EACH_TINY_PROPERTYNAME(COMMON_NAME_INIT_)
#undef COMMON_NAME_INIT_
// Initialize well-known ParserAtoms that use hash set lookup. These also
// point the compile-time names to the own atoms.
#define COMMON_NAME_INIT_(idpart, id, text) \
if (!initSingle(cx, &(id), text, WellKnownAtomId::id)) { \
return false; \
}
FOR_EACH_COMMON_PROPERTYNAME(COMMON_NAME_INIT_)
FOR_EACH_NONTINY_COMMON_PROPERTYNAME(COMMON_NAME_INIT_)
#undef COMMON_NAME_INIT_
#define COMMON_NAME_INIT_(name, clasp) \
if (!initSingle(cx, &(name), #name, WellKnownAtomId::name)) { \
return false; \

View File

@ -68,6 +68,7 @@ enum class StaticParserString2 : uint16_t;
class alignas(alignof(uint32_t)) ParserAtomEntry {
friend class ParserAtomsTable;
friend class WellKnownParserAtoms;
friend class WellKnownParserAtoms_ROM;
static const uint16_t MAX_LATIN1_CHAR = 0xff;
@ -293,6 +294,80 @@ struct ParserAtomLookupHasher {
}
};
class WellKnownParserAtoms_ROM {
public:
static const size_t ASCII_STATIC_LIMIT = 128U;
static const size_t NUM_SMALL_CHARS = StaticStrings::NUM_SMALL_CHARS;
static const size_t NUM_LENGTH2_ENTRIES = NUM_SMALL_CHARS * NUM_SMALL_CHARS;
StaticParserAtomEntry<0> emptyAtom;
StaticParserAtomEntry<1> length1Table[ASCII_STATIC_LIMIT];
StaticParserAtomEntry<2> length2Table[NUM_LENGTH2_ENTRIES];
constexpr WellKnownParserAtoms_ROM() {
// Empty atom
emptyAtom.setHashAndLength(mozilla::HashString(u""), 0);
emptyAtom.setWellKnownAtomId(WellKnownAtomId::empty);
// Length-1 static atoms
for (size_t i = 0; i < ASCII_STATIC_LIMIT; ++i) {
constexpr size_t len = 1;
char16_t buf[] = {static_cast<char16_t>(i),
/* null-terminator */ 0};
length1Table[i].setHashAndLength(mozilla::HashString(buf), len);
length1Table[i].setStaticParserString1(StaticParserString1(i));
length1Table[i].storage()[0] = buf[0];
}
// Length-2 static atoms
for (size_t i = 0; i < NUM_LENGTH2_ENTRIES; ++i) {
constexpr size_t len = 2;
char16_t buf[] = {StaticStrings::fromSmallChar(i >> 6),
StaticStrings::fromSmallChar(i & 0x003F),
/* null-terminator */ 0};
length2Table[i].setHashAndLength(mozilla::HashString(buf), len);
length2Table[i].setStaticParserString2(StaticParserString2(i));
length2Table[i].storage()[0] = buf[0];
length2Table[i].storage()[1] = buf[1];
}
}
// Fast-path tiny strings since they are abundant in minified code.
template <typename CharsT>
const ParserAtom* lookupTiny(CharsT chars, size_t length) const {
static_assert(std::is_same_v<CharsT, const Latin1Char*> ||
std::is_same_v<CharsT, const char16_t*> ||
std::is_same_v<CharsT, const char*> ||
std::is_same_v<CharsT, char16_t*> ||
std::is_same_v<CharsT, LittleEndianChars>,
"This assert mostly explicitly documents the calling types, "
"and forces that to be updated if new types show up.");
switch (length) {
case 0:
return emptyAtom.asAtom();
case 1: {
if (char16_t(chars[0]) < ASCII_STATIC_LIMIT) {
size_t index = static_cast<size_t>(chars[0]);
return length1Table[index].asAtom();
}
break;
}
case 2:
if (StaticStrings::fitsInSmallChar(chars[0]) &&
StaticStrings::fitsInSmallChar(chars[1])) {
size_t index = StaticStrings::getLength2Index(chars[0], chars[1]);
return length2Table[index].asAtom();
}
break;
}
// No match on tiny Atoms
return nullptr;
}
};
/**
* WellKnownParserAtoms reserves a set of common ParserAtoms on the JSRuntime
* in a read-only format to be used by parser. These reserved atoms can be
@ -315,32 +390,21 @@ class WellKnownParserAtoms {
JS_FOR_EACH_PROTOTYPE(PROPERTYNAME_FIELD_)
#undef PROPERTYNAME_FIELD_
private:
// Common tiny strings (such as identifiers in minified code) have ParserAtoms
// generated into constexpr tables.
static constexpr WellKnownParserAtoms_ROM rom_ = {};
// Common property and prototype names are tracked in a hash table. This table
// is does not key for any items already in a direct-indexing table above.
using EntrySet = HashSet<UniquePtr<ParserAtomEntry>, ParserAtomLookupHasher,
js::SystemAllocPolicy>;
EntrySet wellKnownSet_;
static const size_t ASCII_STATIC_LIMIT = 128U;
static const size_t NUM_SMALL_CHARS = StaticStrings::NUM_SMALL_CHARS;
UniquePtr<ParserAtomEntry> length1StaticTable_[ASCII_STATIC_LIMIT] = {};
UniquePtr<ParserAtomEntry>
length2StaticTable_[NUM_SMALL_CHARS * NUM_SMALL_CHARS] = {};
bool initTinyStringAlias(JSContext* cx, const ParserName** name,
const char* str);
bool initSingle(JSContext* cx, const ParserName** name, const char* str,
WellKnownAtomId kind);
bool initStaticStrings(JSContext* cx);
const ParserAtom* getLength1String(char16_t ch) const {
MOZ_ASSERT(ch < ASCII_STATIC_LIMIT);
size_t index = static_cast<size_t>(ch);
return length1StaticTable_[index]->asAtom();
}
const ParserAtom* getLength2String(char16_t ch0, char16_t ch1) const {
size_t index = StaticStrings::getLength2Index(ch0, ch1);
return length2StaticTable_[index]->asAtom();
}
public:
WellKnownParserAtoms() = default;
@ -353,30 +417,9 @@ class WellKnownParserAtoms {
const ParserAtom* lookupChar16Seq(
const SpecificParserAtomLookup<CharT>& lookup) const;
// Fast-path tiny strings since they are abundant in minified code.
template <typename CharT>
const ParserAtom* lookupTiny(const CharT* charPtr, uint32_t length) const {
switch (length) {
case 0:
return empty;
case 1: {
if (char16_t(charPtr[0]) < ASCII_STATIC_LIMIT) {
return getLength1String(charPtr[0]);
}
break;
}
case 2:
if (StaticStrings::fitsInSmallChar(charPtr[0]) &&
StaticStrings::fitsInSmallChar(charPtr[1])) {
return getLength2String(charPtr[0], charPtr[1]);
}
break;
}
// No match on tiny Atoms
return nullptr;
template <typename CharsT>
const ParserAtom* lookupTiny(CharsT chars, size_t length) const {
return rom_.lookupTiny(chars, length);
}
};

View File

@ -11,7 +11,23 @@
#include "js/ProtoKey.h"
#define FOR_EACH_COMMON_PROPERTYNAME(MACRO) \
// The following common atoms are reserved by the js::StaticStrigs /
// WellKnownParserAtoms::lookupTiny mechanisms. We still use a named reference
// for the parser and VM to use.
#define FOR_EACH_TINY_PROPERTYNAME(MACRO) \
MACRO(as, as, "as") \
MACRO(by, by, "by") \
MACRO(comma, comma, ",") \
MACRO(do, do_, "do") \
MACRO(empty, empty, "") \
MACRO(futexOK, futexOK, "ok") \
MACRO(if, if_, "if") \
MACRO(in, in, "in") \
MACRO(js, js, "js") \
MACRO(of, of, "of") \
MACRO(star, star, "*")
#define FOR_EACH_NONTINY_COMMON_PROPERTYNAME(MACRO) \
MACRO(abort, abort, "abort") \
MACRO(add, add, "add") \
MACRO(allowContentIter, allowContentIter, "allowContentIter") \
@ -29,7 +45,6 @@
MACRO(ArrayToLocaleString, ArrayToLocaleString, "ArrayToLocaleString") \
MACRO(ArrayType, ArrayType, "ArrayType") \
MACRO(ArrayValues, ArrayValues, "$ArrayValues") \
MACRO(as, as, "as") \
MACRO(Async, Async, "Async") \
MACRO(AsyncFromSyncIterator, AsyncFromSyncIterator, \
"Async-from-Sync Iterator") \
@ -50,7 +65,6 @@
MACRO(breakdown, breakdown, "breakdown") \
MACRO(buffer, buffer, "buffer") \
MACRO(builder, builder, "builder") \
MACRO(by, by, "by") \
MACRO(byob, byob, "byob") \
MACRO(byteAlignment, byteAlignment, "byteAlignment") \
MACRO(byteLength, byteLength, "byteLength") \
@ -73,7 +87,6 @@
MACRO(collation, collation, "collation") \
MACRO(collections, collections, "collections") \
MACRO(columnNumber, columnNumber, "columnNumber") \
MACRO(comma, comma, ",") \
MACRO(compare, compare, "compare") \
MACRO(configurable, configurable, "configurable") \
MACRO(const, const_, "const") \
@ -113,7 +126,6 @@
MACRO(deleteProperty, deleteProperty, "deleteProperty") \
MACRO(direction, direction, "direction") \
MACRO(displayURL, displayURL, "displayURL") \
MACRO(do, do_, "do") \
MACRO(domNode, domNode, "domNode") \
MACRO(done, done, "done") \
MACRO(dotAll, dotAll, "dotAll") \
@ -127,7 +139,6 @@
MACRO(element, element, "element") \
MACRO(elementType, elementType, "elementType") \
MACRO(else, else_, "else") \
MACRO(empty, empty, "") \
MACRO(emptyRegExp, emptyRegExp, "(?:)") \
MACRO(encodeURI, encodeURI, "encodeURI") \
MACRO(encodeURIComponent, encodeURIComponent, "encodeURIComponent") \
@ -174,7 +185,6 @@
MACRO(from, from, "from") \
MACRO(fulfilled, fulfilled, "fulfilled") \
MACRO(futexNotEqual, futexNotEqual, "not-equal") \
MACRO(futexOK, futexOK, "ok") \
MACRO(futexTimedOut, futexTimedOut, "timed-out") \
MACRO(gcCycleNumber, gcCycleNumber, "gcCycleNumber") \
MACRO(Generator, Generator, "Generator") \
@ -213,12 +223,10 @@
MACRO(highWaterMark, highWaterMark, "highWaterMark") \
MACRO(hour, hour, "hour") \
MACRO(hourCycle, hourCycle, "hourCycle") \
MACRO(if, if_, "if") \
MACRO(ignoreCase, ignoreCase, "ignoreCase") \
MACRO(ignorePunctuation, ignorePunctuation, "ignorePunctuation") \
MACRO(implements, implements, "implements") \
MACRO(import, import, "import") \
MACRO(in, in, "in") \
MACRO(includes, includes, "includes") \
MACRO(incumbentGlobal, incumbentGlobal, "incumbentGlobal") \
MACRO(index, index, "index") \
@ -259,7 +267,6 @@
MACRO(IterableToList, IterableToList, "IterableToList") \
MACRO(iterate, iterate, "iterate") \
MACRO(join, join, "join") \
MACRO(js, js, "js") \
MACRO(keys, keys, "keys") \
MACRO(label, label, "label") \
MACRO(language, language, "language") \
@ -336,7 +343,6 @@
MACRO(objectString, objectString, "[object String]") \
MACRO(objectSymbol, objectSymbol, "[object Symbol]") \
MACRO(objectUndefined, objectUndefined, "[object Undefined]") \
MACRO(of, of, "of") \
MACRO(offset, offset, "offset") \
MACRO(optimizedOut, optimizedOut, "optimizedOut") \
MACRO(other, other, "other") \
@ -407,7 +413,6 @@
MACRO(source, source, "source") \
MACRO(SpeciesConstructor, SpeciesConstructor, "SpeciesConstructor") \
MACRO(stack, stack, "stack") \
MACRO(star, star, "*") \
MACRO(starNamespaceStar, starNamespaceStar, "*namespace*") \
MACRO(start, start, "start") \
MACRO(startRange, startRange, "startRange") \
@ -522,4 +527,8 @@
MACRO(defineDataPropertyIntrinsic, defineDataPropertyIntrinsic, \
"_DefineDataProperty")
#define FOR_EACH_COMMON_PROPERTYNAME(MACRO) \
FOR_EACH_NONTINY_COMMON_PROPERTYNAME(MACRO) \
FOR_EACH_TINY_PROPERTYNAME(MACRO)
#endif /* vm_CommonPropertyNames_h */

View File

@ -1239,47 +1239,6 @@ template bool JSLinearString::isIndexSlow(const Latin1Char* s, size_t length,
template bool JSLinearString::isIndexSlow(const char16_t* s, size_t length,
uint32_t* indexp);
/*
* Declare length-2 strings. We only store strings where both characters are
* alphanumeric. The lower 10 short chars are the numerals, the next 26 are
* the lowercase letters, and the next 26 are the uppercase letters.
*/
constexpr Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
if (c < 10) {
return c + '0';
}
if (c < 36) {
return c + 'a' - 10;
}
if (c < 62) {
return c + 'A' - 36;
}
if (c == 62) {
return '$';
}
return '_';
}
constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
if (mozilla::IsAsciiDigit(c)) {
return c - '0';
}
if (mozilla::IsAsciiLowercaseAlpha(c)) {
return c - 'a' + 10;
}
if (mozilla::IsAsciiUppercaseAlpha(c)) {
return c - 'A' + 36;
}
if (c == '$') {
return 62;
}
if (c == '_') {
return 63;
}
return StaticStrings::INVALID_SMALL_CHAR;
}
constexpr StaticStrings::SmallCharArray StaticStrings::createSmallCharArray() {
SmallCharArray array{};
for (size_t i = 0; i < SMALL_CHAR_LIMIT; i++) {

View File

@ -49,7 +49,7 @@ namespace frontend {
class ParserAtom;
class ParserAtomEntry;
class WellKnownParserAtoms;
class WellKnownParserAtoms_ROM;
} // namespace frontend
@ -1272,7 +1272,7 @@ class StaticStrings {
// NOTE: The WellKnownParserAtoms rely on these tables and may need to be
// update if these tables are changed.
friend class js::frontend::ParserAtomEntry;
friend class js::frontend::WellKnownParserAtoms;
friend class js::frontend::WellKnownParserAtoms_ROM;
private:
/* Bigger chars cannot be in a length-2 string. */
@ -1425,6 +1425,47 @@ class StaticStrings {
}
};
/*
* Declare length-2 strings. We only store strings where both characters are
* alphanumeric. The lower 10 short chars are the numerals, the next 26 are
* the lowercase letters, and the next 26 are the uppercase letters.
*/
constexpr Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
if (c < 10) {
return c + '0';
}
if (c < 36) {
return c + 'a' - 10;
}
if (c < 62) {
return c + 'A' - 36;
}
if (c == 62) {
return '$';
}
return '_';
}
constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
if (mozilla::IsAsciiDigit(c)) {
return c - '0';
}
if (mozilla::IsAsciiLowercaseAlpha(c)) {
return c - 'a' + 10;
}
if (mozilla::IsAsciiUppercaseAlpha(c)) {
return c - 'A' + 36;
}
if (c == '$') {
return 62;
}
if (c == '_') {
return 63;
}
return StaticStrings::INVALID_SMALL_CHAR;
}
/*
* Represents an atomized string which does not contain an index (that is, an
* unsigned 32-bit value). Thus for any PropertyName propname,