diff --git a/js/src/frontend/ParserAtom.cpp b/js/src/frontend/ParserAtom.cpp index 60b3a4d202fb..a829aaae8f89 100644 --- a/js/src/frontend/ParserAtom.cpp +++ b/js/src/frontend/ParserAtom.cpp @@ -504,13 +504,11 @@ bool WellKnownParserAtoms::initSingle(JSContext* cx, const ParserName** name, MOZ_ASSERT(FindSmallestEncoding(UTF8Chars(str, len)) == JS::SmallestEncoding::ASCII); - // If we already reserved a tiny name, reuse the allocation but still point - // the fixed `name` reference at it. - if (const ParserAtom* tiny = lookupTiny(str, len)) { - MOZ_ASSERT(len == 1 || len == 2); - *name = tiny->asName(); - return true; - } + // Strings matched by lookupTiny are stored in static table and aliases should + // only be added using initTinyStringAlias. + MOZ_ASSERT(lookupTiny(str, len) == nullptr, + "Well-known atom matches a tiny StaticString. Did you add it to " + "the wrong CommonPropertyNames.h list?"); InflatedChar16Sequence seq( reinterpret_cast(str), len); @@ -535,70 +533,50 @@ bool WellKnownParserAtoms::initSingle(JSContext* cx, const ParserName** name, return true; } -bool WellKnownParserAtoms::initStaticStrings(JSContext* cx) { - // Create known ParserAtoms for length-1 Latin1 strings. - static_assert(WellKnownParserAtoms::ASCII_STATIC_LIMIT <= - StaticStrings::UNIT_STATIC_LIMIT); - constexpr size_t NUM_LENGTH1 = WellKnownParserAtoms::ASCII_STATIC_LIMIT; - for (size_t i = 0; i < NUM_LENGTH1; ++i) { - JS::AutoCheckCannotGC nogc; - JSAtom* atom = cx->staticStrings().getUnit(char16_t(i)); +bool WellKnownParserAtoms::initTinyStringAlias(JSContext* cx, + const ParserName** name, + const char* str) { + MOZ_ASSERT(name != nullptr); - constexpr size_t len = 1; - MOZ_ASSERT(atom->length() == len); + unsigned int len = strlen(str); - InflatedChar16Sequence seq(atom->latin1Chars(nogc), len); - SpecificParserAtomLookup lookup(seq); - HashNumber hash = lookup.hash(); + // Well-known atoms are all currently ASCII with length <= MaxWellKnownLength. + MOZ_ASSERT(len <= MaxWellKnownLength); + MOZ_ASSERT(FindSmallestEncoding(UTF8Chars(str, len)) == + JS::SmallestEncoding::ASCII); - auto maybeEntry = ParserAtomEntry::allocate(cx, seq, len, hash); - if (maybeEntry.isErr()) { - return false; - } - - length1StaticTable_[i] = maybeEntry.unwrap(); - length1StaticTable_[i]->setStaticParserString1(StaticParserString1(i)); - } - - // Create known ParserAtoms for length-2 alpha-num strings. - constexpr size_t NUM_LENGTH2 = NUM_SMALL_CHARS * NUM_SMALL_CHARS; - for (size_t i = 0; i < NUM_LENGTH2; ++i) { - JS::AutoCheckCannotGC nogc; - JSAtom* atom = cx->staticStrings().getLength2FromIndex(i); - - constexpr size_t len = 2; - MOZ_ASSERT(atom->length() == len); - - InflatedChar16Sequence seq(atom->latin1Chars(nogc), len); - SpecificParserAtomLookup lookup(seq); - HashNumber hash = lookup.hash(); - - auto maybeEntry = ParserAtomEntry::allocate(cx, seq, len, hash); - if (maybeEntry.isErr()) { - return false; - } - - length2StaticTable_[i] = maybeEntry.unwrap(); - length2StaticTable_[i]->setStaticParserString2(StaticParserString2(i)); - } + // NOTE: If this assert fails, you may need to change which list is it belongs + // to in CommonPropertyNames.h. + const ParserAtom* tiny = lookupTiny(str, len); + MOZ_ASSERT(tiny, "Tiny common name was not found"); + // Set alias to existing atom. + *name = tiny->asName(); return true; } bool WellKnownParserAtoms::init(JSContext* cx) { - // Initialize the tiny strings before common names since there are some short - // common names. - if (!initStaticStrings(cx)) { - return false; - } + // NOTE: Well-known tiny strings (with length <= 2) are stored in the + // WellKnownParserAtoms_ROM table. This uses static constexpr initialization + // so we don't need to do anything here. + // Tiny strings with a common name need a named alias to an entry in the + // WellKnownParserAtoms_ROM. +#define COMMON_NAME_INIT_(idpart, id, text) \ + if (!initTinyStringAlias(cx, &(id), text)) { \ + return false; \ + } + FOR_EACH_TINY_PROPERTYNAME(COMMON_NAME_INIT_) +#undef COMMON_NAME_INIT_ + + // Initialize well-known ParserAtoms that use hash set lookup. These also + // point the compile-time names to the own atoms. #define COMMON_NAME_INIT_(idpart, id, text) \ if (!initSingle(cx, &(id), text, WellKnownAtomId::id)) { \ return false; \ } - FOR_EACH_COMMON_PROPERTYNAME(COMMON_NAME_INIT_) + FOR_EACH_NONTINY_COMMON_PROPERTYNAME(COMMON_NAME_INIT_) #undef COMMON_NAME_INIT_ - #define COMMON_NAME_INIT_(name, clasp) \ if (!initSingle(cx, &(name), #name, WellKnownAtomId::name)) { \ return false; \ diff --git a/js/src/frontend/ParserAtom.h b/js/src/frontend/ParserAtom.h index 1ebc4072d8f3..f99348aa81d0 100644 --- a/js/src/frontend/ParserAtom.h +++ b/js/src/frontend/ParserAtom.h @@ -68,6 +68,7 @@ enum class StaticParserString2 : uint16_t; class alignas(alignof(uint32_t)) ParserAtomEntry { friend class ParserAtomsTable; friend class WellKnownParserAtoms; + friend class WellKnownParserAtoms_ROM; static const uint16_t MAX_LATIN1_CHAR = 0xff; @@ -293,6 +294,80 @@ struct ParserAtomLookupHasher { } }; +class WellKnownParserAtoms_ROM { + public: + static const size_t ASCII_STATIC_LIMIT = 128U; + static const size_t NUM_SMALL_CHARS = StaticStrings::NUM_SMALL_CHARS; + static const size_t NUM_LENGTH2_ENTRIES = NUM_SMALL_CHARS * NUM_SMALL_CHARS; + + StaticParserAtomEntry<0> emptyAtom; + StaticParserAtomEntry<1> length1Table[ASCII_STATIC_LIMIT]; + StaticParserAtomEntry<2> length2Table[NUM_LENGTH2_ENTRIES]; + + constexpr WellKnownParserAtoms_ROM() { + // Empty atom + emptyAtom.setHashAndLength(mozilla::HashString(u""), 0); + emptyAtom.setWellKnownAtomId(WellKnownAtomId::empty); + + // Length-1 static atoms + for (size_t i = 0; i < ASCII_STATIC_LIMIT; ++i) { + constexpr size_t len = 1; + char16_t buf[] = {static_cast(i), + /* null-terminator */ 0}; + length1Table[i].setHashAndLength(mozilla::HashString(buf), len); + length1Table[i].setStaticParserString1(StaticParserString1(i)); + length1Table[i].storage()[0] = buf[0]; + } + + // Length-2 static atoms + for (size_t i = 0; i < NUM_LENGTH2_ENTRIES; ++i) { + constexpr size_t len = 2; + char16_t buf[] = {StaticStrings::fromSmallChar(i >> 6), + StaticStrings::fromSmallChar(i & 0x003F), + /* null-terminator */ 0}; + length2Table[i].setHashAndLength(mozilla::HashString(buf), len); + length2Table[i].setStaticParserString2(StaticParserString2(i)); + length2Table[i].storage()[0] = buf[0]; + length2Table[i].storage()[1] = buf[1]; + } + } + + // Fast-path tiny strings since they are abundant in minified code. + template + const ParserAtom* lookupTiny(CharsT chars, size_t length) const { + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v, + "This assert mostly explicitly documents the calling types, " + "and forces that to be updated if new types show up."); + switch (length) { + case 0: + return emptyAtom.asAtom(); + + case 1: { + if (char16_t(chars[0]) < ASCII_STATIC_LIMIT) { + size_t index = static_cast(chars[0]); + return length1Table[index].asAtom(); + } + break; + } + + case 2: + if (StaticStrings::fitsInSmallChar(chars[0]) && + StaticStrings::fitsInSmallChar(chars[1])) { + size_t index = StaticStrings::getLength2Index(chars[0], chars[1]); + return length2Table[index].asAtom(); + } + break; + } + + // No match on tiny Atoms + return nullptr; + } +}; + /** * WellKnownParserAtoms reserves a set of common ParserAtoms on the JSRuntime * in a read-only format to be used by parser. These reserved atoms can be @@ -315,32 +390,21 @@ class WellKnownParserAtoms { JS_FOR_EACH_PROTOTYPE(PROPERTYNAME_FIELD_) #undef PROPERTYNAME_FIELD_ - private: + // Common tiny strings (such as identifiers in minified code) have ParserAtoms + // generated into constexpr tables. + static constexpr WellKnownParserAtoms_ROM rom_ = {}; + + // Common property and prototype names are tracked in a hash table. This table + // is does not key for any items already in a direct-indexing table above. using EntrySet = HashSet, ParserAtomLookupHasher, js::SystemAllocPolicy>; EntrySet wellKnownSet_; - static const size_t ASCII_STATIC_LIMIT = 128U; - static const size_t NUM_SMALL_CHARS = StaticStrings::NUM_SMALL_CHARS; - UniquePtr length1StaticTable_[ASCII_STATIC_LIMIT] = {}; - UniquePtr - length2StaticTable_[NUM_SMALL_CHARS * NUM_SMALL_CHARS] = {}; - + bool initTinyStringAlias(JSContext* cx, const ParserName** name, + const char* str); bool initSingle(JSContext* cx, const ParserName** name, const char* str, WellKnownAtomId kind); - bool initStaticStrings(JSContext* cx); - - const ParserAtom* getLength1String(char16_t ch) const { - MOZ_ASSERT(ch < ASCII_STATIC_LIMIT); - size_t index = static_cast(ch); - return length1StaticTable_[index]->asAtom(); - } - const ParserAtom* getLength2String(char16_t ch0, char16_t ch1) const { - size_t index = StaticStrings::getLength2Index(ch0, ch1); - return length2StaticTable_[index]->asAtom(); - } - public: WellKnownParserAtoms() = default; @@ -353,30 +417,9 @@ class WellKnownParserAtoms { const ParserAtom* lookupChar16Seq( const SpecificParserAtomLookup& lookup) const; - // Fast-path tiny strings since they are abundant in minified code. - template - const ParserAtom* lookupTiny(const CharT* charPtr, uint32_t length) const { - switch (length) { - case 0: - return empty; - - case 1: { - if (char16_t(charPtr[0]) < ASCII_STATIC_LIMIT) { - return getLength1String(charPtr[0]); - } - break; - } - - case 2: - if (StaticStrings::fitsInSmallChar(charPtr[0]) && - StaticStrings::fitsInSmallChar(charPtr[1])) { - return getLength2String(charPtr[0], charPtr[1]); - } - break; - } - - // No match on tiny Atoms - return nullptr; + template + const ParserAtom* lookupTiny(CharsT chars, size_t length) const { + return rom_.lookupTiny(chars, length); } }; diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h index 0ef6be7e2d04..50dd8b1cec8e 100644 --- a/js/src/vm/CommonPropertyNames.h +++ b/js/src/vm/CommonPropertyNames.h @@ -11,7 +11,23 @@ #include "js/ProtoKey.h" -#define FOR_EACH_COMMON_PROPERTYNAME(MACRO) \ +// The following common atoms are reserved by the js::StaticStrigs / +// WellKnownParserAtoms::lookupTiny mechanisms. We still use a named reference +// for the parser and VM to use. +#define FOR_EACH_TINY_PROPERTYNAME(MACRO) \ + MACRO(as, as, "as") \ + MACRO(by, by, "by") \ + MACRO(comma, comma, ",") \ + MACRO(do, do_, "do") \ + MACRO(empty, empty, "") \ + MACRO(futexOK, futexOK, "ok") \ + MACRO(if, if_, "if") \ + MACRO(in, in, "in") \ + MACRO(js, js, "js") \ + MACRO(of, of, "of") \ + MACRO(star, star, "*") + +#define FOR_EACH_NONTINY_COMMON_PROPERTYNAME(MACRO) \ MACRO(abort, abort, "abort") \ MACRO(add, add, "add") \ MACRO(allowContentIter, allowContentIter, "allowContentIter") \ @@ -29,7 +45,6 @@ MACRO(ArrayToLocaleString, ArrayToLocaleString, "ArrayToLocaleString") \ MACRO(ArrayType, ArrayType, "ArrayType") \ MACRO(ArrayValues, ArrayValues, "$ArrayValues") \ - MACRO(as, as, "as") \ MACRO(Async, Async, "Async") \ MACRO(AsyncFromSyncIterator, AsyncFromSyncIterator, \ "Async-from-Sync Iterator") \ @@ -50,7 +65,6 @@ MACRO(breakdown, breakdown, "breakdown") \ MACRO(buffer, buffer, "buffer") \ MACRO(builder, builder, "builder") \ - MACRO(by, by, "by") \ MACRO(byob, byob, "byob") \ MACRO(byteAlignment, byteAlignment, "byteAlignment") \ MACRO(byteLength, byteLength, "byteLength") \ @@ -73,7 +87,6 @@ MACRO(collation, collation, "collation") \ MACRO(collections, collections, "collections") \ MACRO(columnNumber, columnNumber, "columnNumber") \ - MACRO(comma, comma, ",") \ MACRO(compare, compare, "compare") \ MACRO(configurable, configurable, "configurable") \ MACRO(const, const_, "const") \ @@ -113,7 +126,6 @@ MACRO(deleteProperty, deleteProperty, "deleteProperty") \ MACRO(direction, direction, "direction") \ MACRO(displayURL, displayURL, "displayURL") \ - MACRO(do, do_, "do") \ MACRO(domNode, domNode, "domNode") \ MACRO(done, done, "done") \ MACRO(dotAll, dotAll, "dotAll") \ @@ -127,7 +139,6 @@ MACRO(element, element, "element") \ MACRO(elementType, elementType, "elementType") \ MACRO(else, else_, "else") \ - MACRO(empty, empty, "") \ MACRO(emptyRegExp, emptyRegExp, "(?:)") \ MACRO(encodeURI, encodeURI, "encodeURI") \ MACRO(encodeURIComponent, encodeURIComponent, "encodeURIComponent") \ @@ -174,7 +185,6 @@ MACRO(from, from, "from") \ MACRO(fulfilled, fulfilled, "fulfilled") \ MACRO(futexNotEqual, futexNotEqual, "not-equal") \ - MACRO(futexOK, futexOK, "ok") \ MACRO(futexTimedOut, futexTimedOut, "timed-out") \ MACRO(gcCycleNumber, gcCycleNumber, "gcCycleNumber") \ MACRO(Generator, Generator, "Generator") \ @@ -213,12 +223,10 @@ MACRO(highWaterMark, highWaterMark, "highWaterMark") \ MACRO(hour, hour, "hour") \ MACRO(hourCycle, hourCycle, "hourCycle") \ - MACRO(if, if_, "if") \ MACRO(ignoreCase, ignoreCase, "ignoreCase") \ MACRO(ignorePunctuation, ignorePunctuation, "ignorePunctuation") \ MACRO(implements, implements, "implements") \ MACRO(import, import, "import") \ - MACRO(in, in, "in") \ MACRO(includes, includes, "includes") \ MACRO(incumbentGlobal, incumbentGlobal, "incumbentGlobal") \ MACRO(index, index, "index") \ @@ -259,7 +267,6 @@ MACRO(IterableToList, IterableToList, "IterableToList") \ MACRO(iterate, iterate, "iterate") \ MACRO(join, join, "join") \ - MACRO(js, js, "js") \ MACRO(keys, keys, "keys") \ MACRO(label, label, "label") \ MACRO(language, language, "language") \ @@ -336,7 +343,6 @@ MACRO(objectString, objectString, "[object String]") \ MACRO(objectSymbol, objectSymbol, "[object Symbol]") \ MACRO(objectUndefined, objectUndefined, "[object Undefined]") \ - MACRO(of, of, "of") \ MACRO(offset, offset, "offset") \ MACRO(optimizedOut, optimizedOut, "optimizedOut") \ MACRO(other, other, "other") \ @@ -407,7 +413,6 @@ MACRO(source, source, "source") \ MACRO(SpeciesConstructor, SpeciesConstructor, "SpeciesConstructor") \ MACRO(stack, stack, "stack") \ - MACRO(star, star, "*") \ MACRO(starNamespaceStar, starNamespaceStar, "*namespace*") \ MACRO(start, start, "start") \ MACRO(startRange, startRange, "startRange") \ @@ -522,4 +527,8 @@ MACRO(defineDataPropertyIntrinsic, defineDataPropertyIntrinsic, \ "_DefineDataProperty") +#define FOR_EACH_COMMON_PROPERTYNAME(MACRO) \ + FOR_EACH_NONTINY_COMMON_PROPERTYNAME(MACRO) \ + FOR_EACH_TINY_PROPERTYNAME(MACRO) + #endif /* vm_CommonPropertyNames_h */ diff --git a/js/src/vm/StringType.cpp b/js/src/vm/StringType.cpp index ea9df66b4e35..9f89979c5596 100644 --- a/js/src/vm/StringType.cpp +++ b/js/src/vm/StringType.cpp @@ -1239,47 +1239,6 @@ template bool JSLinearString::isIndexSlow(const Latin1Char* s, size_t length, template bool JSLinearString::isIndexSlow(const char16_t* s, size_t length, uint32_t* indexp); -/* - * Declare length-2 strings. We only store strings where both characters are - * alphanumeric. The lower 10 short chars are the numerals, the next 26 are - * the lowercase letters, and the next 26 are the uppercase letters. - */ - -constexpr Latin1Char StaticStrings::fromSmallChar(SmallChar c) { - if (c < 10) { - return c + '0'; - } - if (c < 36) { - return c + 'a' - 10; - } - if (c < 62) { - return c + 'A' - 36; - } - if (c == 62) { - return '$'; - } - return '_'; -} - -constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) { - if (mozilla::IsAsciiDigit(c)) { - return c - '0'; - } - if (mozilla::IsAsciiLowercaseAlpha(c)) { - return c - 'a' + 10; - } - if (mozilla::IsAsciiUppercaseAlpha(c)) { - return c - 'A' + 36; - } - if (c == '$') { - return 62; - } - if (c == '_') { - return 63; - } - return StaticStrings::INVALID_SMALL_CHAR; -} - constexpr StaticStrings::SmallCharArray StaticStrings::createSmallCharArray() { SmallCharArray array{}; for (size_t i = 0; i < SMALL_CHAR_LIMIT; i++) { diff --git a/js/src/vm/StringType.h b/js/src/vm/StringType.h index 5b1d68882ba5..0cdf8f38e4d0 100644 --- a/js/src/vm/StringType.h +++ b/js/src/vm/StringType.h @@ -49,7 +49,7 @@ namespace frontend { class ParserAtom; class ParserAtomEntry; -class WellKnownParserAtoms; +class WellKnownParserAtoms_ROM; } // namespace frontend @@ -1272,7 +1272,7 @@ class StaticStrings { // NOTE: The WellKnownParserAtoms rely on these tables and may need to be // update if these tables are changed. friend class js::frontend::ParserAtomEntry; - friend class js::frontend::WellKnownParserAtoms; + friend class js::frontend::WellKnownParserAtoms_ROM; private: /* Bigger chars cannot be in a length-2 string. */ @@ -1425,6 +1425,47 @@ class StaticStrings { } }; +/* + * Declare length-2 strings. We only store strings where both characters are + * alphanumeric. The lower 10 short chars are the numerals, the next 26 are + * the lowercase letters, and the next 26 are the uppercase letters. + */ + +constexpr Latin1Char StaticStrings::fromSmallChar(SmallChar c) { + if (c < 10) { + return c + '0'; + } + if (c < 36) { + return c + 'a' - 10; + } + if (c < 62) { + return c + 'A' - 36; + } + if (c == 62) { + return '$'; + } + return '_'; +} + +constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) { + if (mozilla::IsAsciiDigit(c)) { + return c - '0'; + } + if (mozilla::IsAsciiLowercaseAlpha(c)) { + return c - 'a' + 10; + } + if (mozilla::IsAsciiUppercaseAlpha(c)) { + return c - 'A' + 36; + } + if (c == '$') { + return 62; + } + if (c == '_') { + return 63; + } + return StaticStrings::INVALID_SMALL_CHAR; +} + /* * Represents an atomized string which does not contain an index (that is, an * unsigned 32-bit value). Thus for any PropertyName propname,