/* * Copyright (C) 2009-2019 Apple Inc. All rights reserved. * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "LiteralParser.h" #include "CodeBlock.h" #include "JSArray.h" #include "JSCInlines.h" #include "Lexer.h" #include "ObjectConstructor.h" #include #include #include namespace JSC { template static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c) { // The JSON RFC 4627 defines a list of allowed characters to be considered // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; } template bool LiteralParser::tryJSONPParse(Vector& results, bool needsFullSourceInfo) { VM& vm = m_globalObject->vm(); auto scope = DECLARE_THROW_SCOPE(vm); if (m_lexer.next() != TokIdentifier) return false; do { Vector path; // Unguarded next to start off the lexer Identifier name = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); JSONPPathEntry entry; if (name == vm.propertyNames->varKeyword) { if (m_lexer.next() != TokIdentifier) return false; entry.m_type = JSONPPathEntryTypeDeclareVar; entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); path.append(entry); } else { entry.m_type = JSONPPathEntryTypeDot; entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); path.append(entry); } if (isLexerKeyword(entry.m_pathEntryName)) return false; TokenType tokenType = m_lexer.next(); if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign) return false; while (tokenType != TokAssign) { switch (tokenType) { case TokLBracket: { entry.m_type = JSONPPathEntryTypeLookup; if (m_lexer.next() != TokNumber) return false; double doubleIndex = m_lexer.currentToken()->numberToken; int index = (int)doubleIndex; if (index != doubleIndex || index < 0) return false; entry.m_pathIndex = index; if (m_lexer.next() != TokRBracket) return false; break; } case TokDot: { entry.m_type = JSONPPathEntryTypeDot; if (m_lexer.next() != TokIdentifier) return false; entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); break; } case TokLParen: { if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo) return false; path.last().m_type = JSONPPathEntryTypeCall; entry = path.last(); goto startJSON; } default: return false; } path.append(entry); tokenType = m_lexer.next(); } startJSON: m_lexer.next(); results.append(JSONPData()); JSValue startParseExpressionValue = parse(StartParseExpression); RETURN_IF_EXCEPTION(scope, false); results.last().m_value.set(vm, startParseExpressionValue); if (!results.last().m_value) return false; results.last().m_path.swap(path); if (entry.m_type == JSONPPathEntryTypeCall) { if (m_lexer.currentToken()->type != TokRParen) return false; m_lexer.next(); } if (m_lexer.currentToken()->type != TokSemi) break; m_lexer.next(); } while (m_lexer.currentToken()->type == TokIdentifier); return m_lexer.currentToken()->type == TokEnd; } template ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const LChar* characters, size_t length) { VM& vm = m_globalObject->vm(); if (!length) return vm.propertyNames->emptyIdentifier; if (characters[0] >= MaximumCachableCharacter) return Identifier::fromString(vm, characters, length); if (length == 1) { if (!m_shortIdentifiers[characters[0]].isNull()) return m_shortIdentifiers[characters[0]]; m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); return m_shortIdentifiers[characters[0]]; } if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) return m_recentIdentifiers[characters[0]]; m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); return m_recentIdentifiers[characters[0]]; } template ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const UChar* characters, size_t length) { VM& vm = m_globalObject->vm(); if (!length) return vm.propertyNames->emptyIdentifier; if (characters[0] >= MaximumCachableCharacter) return Identifier::fromString(vm, characters, length); if (length == 1) { if (!m_shortIdentifiers[characters[0]].isNull()) return m_shortIdentifiers[characters[0]]; m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); return m_shortIdentifiers[characters[0]]; } if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) return m_recentIdentifiers[characters[0]]; m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); return m_recentIdentifiers[characters[0]]; } // 256 Latin-1 codes static constexpr const TokenType TokenTypesOfLatin1Characters[256] = { /* 0 - Null */ TokError, /* 1 - Start of Heading */ TokError, /* 2 - Start of Text */ TokError, /* 3 - End of Text */ TokError, /* 4 - End of Transm. */ TokError, /* 5 - Enquiry */ TokError, /* 6 - Acknowledgment */ TokError, /* 7 - Bell */ TokError, /* 8 - Back Space */ TokError, /* 9 - Horizontal Tab */ TokError, /* 10 - Line Feed */ TokError, /* 11 - Vertical Tab */ TokError, /* 12 - Form Feed */ TokError, /* 13 - Carriage Return */ TokError, /* 14 - Shift Out */ TokError, /* 15 - Shift In */ TokError, /* 16 - Data Line Escape */ TokError, /* 17 - Device Control 1 */ TokError, /* 18 - Device Control 2 */ TokError, /* 19 - Device Control 3 */ TokError, /* 20 - Device Control 4 */ TokError, /* 21 - Negative Ack. */ TokError, /* 22 - Synchronous Idle */ TokError, /* 23 - End of Transmit */ TokError, /* 24 - Cancel */ TokError, /* 25 - End of Medium */ TokError, /* 26 - Substitute */ TokError, /* 27 - Escape */ TokError, /* 28 - File Separator */ TokError, /* 29 - Group Separator */ TokError, /* 30 - Record Separator */ TokError, /* 31 - Unit Separator */ TokError, /* 32 - Space */ TokError, /* 33 - ! */ TokError, /* 34 - " */ TokString, /* 35 - # */ TokError, /* 36 - $ */ TokIdentifier, /* 37 - % */ TokError, /* 38 - & */ TokError, /* 39 - ' */ TokString, /* 40 - ( */ TokLParen, /* 41 - ) */ TokRParen, /* 42 - * */ TokError, /* 43 - + */ TokError, /* 44 - , */ TokComma, /* 45 - - */ TokNumber, /* 46 - . */ TokDot, /* 47 - / */ TokError, /* 48 - 0 */ TokNumber, /* 49 - 1 */ TokNumber, /* 50 - 2 */ TokNumber, /* 51 - 3 */ TokNumber, /* 52 - 4 */ TokNumber, /* 53 - 5 */ TokNumber, /* 54 - 6 */ TokNumber, /* 55 - 7 */ TokNumber, /* 56 - 8 */ TokNumber, /* 57 - 9 */ TokNumber, /* 58 - : */ TokColon, /* 59 - ; */ TokSemi, /* 60 - < */ TokError, /* 61 - = */ TokAssign, /* 62 - > */ TokError, /* 63 - ? */ TokError, /* 64 - @ */ TokError, /* 65 - A */ TokIdentifier, /* 66 - B */ TokIdentifier, /* 67 - C */ TokIdentifier, /* 68 - D */ TokIdentifier, /* 69 - E */ TokIdentifier, /* 70 - F */ TokIdentifier, /* 71 - G */ TokIdentifier, /* 72 - H */ TokIdentifier, /* 73 - I */ TokIdentifier, /* 74 - J */ TokIdentifier, /* 75 - K */ TokIdentifier, /* 76 - L */ TokIdentifier, /* 77 - M */ TokIdentifier, /* 78 - N */ TokIdentifier, /* 79 - O */ TokIdentifier, /* 80 - P */ TokIdentifier, /* 81 - Q */ TokIdentifier, /* 82 - R */ TokIdentifier, /* 83 - S */ TokIdentifier, /* 84 - T */ TokIdentifier, /* 85 - U */ TokIdentifier, /* 86 - V */ TokIdentifier, /* 87 - W */ TokIdentifier, /* 88 - X */ TokIdentifier, /* 89 - Y */ TokIdentifier, /* 90 - Z */ TokIdentifier, /* 91 - [ */ TokLBracket, /* 92 - \ */ TokError, /* 93 - ] */ TokRBracket, /* 94 - ^ */ TokError, /* 95 - _ */ TokIdentifier, /* 96 - ` */ TokError, /* 97 - a */ TokIdentifier, /* 98 - b */ TokIdentifier, /* 99 - c */ TokIdentifier, /* 100 - d */ TokIdentifier, /* 101 - e */ TokIdentifier, /* 102 - f */ TokIdentifier, /* 103 - g */ TokIdentifier, /* 104 - h */ TokIdentifier, /* 105 - i */ TokIdentifier, /* 106 - j */ TokIdentifier, /* 107 - k */ TokIdentifier, /* 108 - l */ TokIdentifier, /* 109 - m */ TokIdentifier, /* 110 - n */ TokIdentifier, /* 111 - o */ TokIdentifier, /* 112 - p */ TokIdentifier, /* 113 - q */ TokIdentifier, /* 114 - r */ TokIdentifier, /* 115 - s */ TokIdentifier, /* 116 - t */ TokIdentifier, /* 117 - u */ TokIdentifier, /* 118 - v */ TokIdentifier, /* 119 - w */ TokIdentifier, /* 120 - x */ TokIdentifier, /* 121 - y */ TokIdentifier, /* 122 - z */ TokIdentifier, /* 123 - { */ TokLBrace, /* 124 - | */ TokError, /* 125 - } */ TokRBrace, /* 126 - ~ */ TokError, /* 127 - Delete */ TokError, /* 128 - Cc category */ TokError, /* 129 - Cc category */ TokError, /* 130 - Cc category */ TokError, /* 131 - Cc category */ TokError, /* 132 - Cc category */ TokError, /* 133 - Cc category */ TokError, /* 134 - Cc category */ TokError, /* 135 - Cc category */ TokError, /* 136 - Cc category */ TokError, /* 137 - Cc category */ TokError, /* 138 - Cc category */ TokError, /* 139 - Cc category */ TokError, /* 140 - Cc category */ TokError, /* 141 - Cc category */ TokError, /* 142 - Cc category */ TokError, /* 143 - Cc category */ TokError, /* 144 - Cc category */ TokError, /* 145 - Cc category */ TokError, /* 146 - Cc category */ TokError, /* 147 - Cc category */ TokError, /* 148 - Cc category */ TokError, /* 149 - Cc category */ TokError, /* 150 - Cc category */ TokError, /* 151 - Cc category */ TokError, /* 152 - Cc category */ TokError, /* 153 - Cc category */ TokError, /* 154 - Cc category */ TokError, /* 155 - Cc category */ TokError, /* 156 - Cc category */ TokError, /* 157 - Cc category */ TokError, /* 158 - Cc category */ TokError, /* 159 - Cc category */ TokError, /* 160 - Zs category (nbsp) */ TokError, /* 161 - Po category */ TokError, /* 162 - Sc category */ TokError, /* 163 - Sc category */ TokError, /* 164 - Sc category */ TokError, /* 165 - Sc category */ TokError, /* 166 - So category */ TokError, /* 167 - So category */ TokError, /* 168 - Sk category */ TokError, /* 169 - So category */ TokError, /* 170 - Ll category */ TokError, /* 171 - Pi category */ TokError, /* 172 - Sm category */ TokError, /* 173 - Cf category */ TokError, /* 174 - So category */ TokError, /* 175 - Sk category */ TokError, /* 176 - So category */ TokError, /* 177 - Sm category */ TokError, /* 178 - No category */ TokError, /* 179 - No category */ TokError, /* 180 - Sk category */ TokError, /* 181 - Ll category */ TokError, /* 182 - So category */ TokError, /* 183 - Po category */ TokError, /* 184 - Sk category */ TokError, /* 185 - No category */ TokError, /* 186 - Ll category */ TokError, /* 187 - Pf category */ TokError, /* 188 - No category */ TokError, /* 189 - No category */ TokError, /* 190 - No category */ TokError, /* 191 - Po category */ TokError, /* 192 - Lu category */ TokError, /* 193 - Lu category */ TokError, /* 194 - Lu category */ TokError, /* 195 - Lu category */ TokError, /* 196 - Lu category */ TokError, /* 197 - Lu category */ TokError, /* 198 - Lu category */ TokError, /* 199 - Lu category */ TokError, /* 200 - Lu category */ TokError, /* 201 - Lu category */ TokError, /* 202 - Lu category */ TokError, /* 203 - Lu category */ TokError, /* 204 - Lu category */ TokError, /* 205 - Lu category */ TokError, /* 206 - Lu category */ TokError, /* 207 - Lu category */ TokError, /* 208 - Lu category */ TokError, /* 209 - Lu category */ TokError, /* 210 - Lu category */ TokError, /* 211 - Lu category */ TokError, /* 212 - Lu category */ TokError, /* 213 - Lu category */ TokError, /* 214 - Lu category */ TokError, /* 215 - Sm category */ TokError, /* 216 - Lu category */ TokError, /* 217 - Lu category */ TokError, /* 218 - Lu category */ TokError, /* 219 - Lu category */ TokError, /* 220 - Lu category */ TokError, /* 221 - Lu category */ TokError, /* 222 - Lu category */ TokError, /* 223 - Ll category */ TokError, /* 224 - Ll category */ TokError, /* 225 - Ll category */ TokError, /* 226 - Ll category */ TokError, /* 227 - Ll category */ TokError, /* 228 - Ll category */ TokError, /* 229 - Ll category */ TokError, /* 230 - Ll category */ TokError, /* 231 - Ll category */ TokError, /* 232 - Ll category */ TokError, /* 233 - Ll category */ TokError, /* 234 - Ll category */ TokError, /* 235 - Ll category */ TokError, /* 236 - Ll category */ TokError, /* 237 - Ll category */ TokError, /* 238 - Ll category */ TokError, /* 239 - Ll category */ TokError, /* 240 - Ll category */ TokError, /* 241 - Ll category */ TokError, /* 242 - Ll category */ TokError, /* 243 - Ll category */ TokError, /* 244 - Ll category */ TokError, /* 245 - Ll category */ TokError, /* 246 - Ll category */ TokError, /* 247 - Sm category */ TokError, /* 248 - Ll category */ TokError, /* 249 - Ll category */ TokError, /* 250 - Ll category */ TokError, /* 251 - Ll category */ TokError, /* 252 - Ll category */ TokError, /* 253 - Ll category */ TokError, /* 254 - Ll category */ TokError, /* 255 - Ll category */ TokError }; template ALWAYS_INLINE TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) { #if ASSERT_ENABLED m_currentTokenID++; #endif while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); if (m_ptr == m_end) { token.type = TokEnd; token.start = token.end = m_ptr; return TokEnd; } ASSERT(m_ptr < m_end); token.type = TokError; token.start = m_ptr; CharType character = *m_ptr; if (LIKELY(isLatin1(character))) { TokenType tokenType = TokenTypesOfLatin1Characters[character]; switch (tokenType) { case TokString: if (character == '\'' && m_mode == StrictJSON) { m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s; return TokError; } return lexString(token, character); case TokIdentifier: { switch (character) { case 't': if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { m_ptr += 4; token.type = TokTrue; token.end = m_ptr; return TokTrue; } break; case 'f': if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { m_ptr += 5; token.type = TokFalse; token.end = m_ptr; return TokFalse; } break; case 'n': if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { m_ptr += 4; token.type = TokNull; token.end = m_ptr; return TokNull; } break; } return lexIdentifier(token); } case TokNumber: return lexNumber(token); case TokError: break; default: ASSERT(tokenType == TokLBracket || tokenType == TokRBracket || tokenType == TokLBrace || tokenType == TokRBrace || tokenType == TokColon || tokenType == TokLParen || tokenType == TokRParen || tokenType == TokComma || tokenType == TokDot || tokenType == TokAssign || tokenType == TokSemi); token.type = tokenType; token.end = ++m_ptr; return tokenType; } } m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, 1 }, '\''); return TokError; } template <> ALWAYS_INLINE TokenType LiteralParser::Lexer::lexIdentifier(LiteralParserToken& token) { while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$')) m_ptr++; token.stringIs8Bit = 1; token.stringToken8 = token.start; token.stringLength = m_ptr - token.start; token.type = TokIdentifier; token.end = m_ptr; return TokIdentifier; } template <> ALWAYS_INLINE TokenType LiteralParser::Lexer::lexIdentifier(LiteralParserToken& token) { while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D)) m_ptr++; token.stringIs8Bit = 0; token.stringToken16 = token.start; token.stringLength = m_ptr - token.start; token.type = TokIdentifier; token.end = m_ptr; return TokIdentifier; } template TokenType LiteralParser::Lexer::next() { TokenType result = lex(m_currentToken); ASSERT(m_currentToken.type == result); return result; } template <> ALWAYS_INLINE void setParserTokenString(LiteralParserToken& token, const LChar* string) { token.stringIs8Bit = 1; token.stringToken8 = string; } template <> ALWAYS_INLINE void setParserTokenString(LiteralParserToken& token, const UChar* string) { token.stringIs8Bit = 0; token.stringToken16 = string; } enum class SafeStringCharacterSet { Strict, NonStrict }; template static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator) { return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); } template static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator) { return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); } template ALWAYS_INLINE TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token, CharType terminator) { ++m_ptr; const CharType* runStart = m_ptr; if (m_mode == StrictJSON) { while (m_ptr < m_end && isSafeStringCharacter(*m_ptr, terminator)) ++m_ptr; } else { while (m_ptr < m_end && isSafeStringCharacter(*m_ptr, terminator)) ++m_ptr; } if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) { setParserTokenString(token, runStart); token.stringLength = m_ptr - runStart; token.type = TokString; token.end = ++m_ptr; return TokString; } return lexStringSlow(token, runStart, terminator); } template TokenType LiteralParser::Lexer::lexStringSlow(LiteralParserToken& token, const CharType* runStart, CharType terminator) { m_builder.clear(); goto slowPathBegin; do { runStart = m_ptr; if (m_mode == StrictJSON) { while (m_ptr < m_end && isSafeStringCharacter(*m_ptr, terminator)) ++m_ptr; } else { while (m_ptr < m_end && isSafeStringCharacter(*m_ptr, terminator)) ++m_ptr; } if (!m_builder.isEmpty()) m_builder.appendCharacters(runStart, m_ptr - runStart); slowPathBegin: if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') { if (m_builder.isEmpty() && runStart < m_ptr) m_builder.appendCharacters(runStart, m_ptr - runStart); ++m_ptr; if (m_ptr >= m_end) { m_lexErrorMessage = "Unterminated string"_s; return TokError; } switch (*m_ptr) { case '"': m_builder.append('"'); m_ptr++; break; case '\\': m_builder.append('\\'); m_ptr++; break; case '/': m_builder.append('/'); m_ptr++; break; case 'b': m_builder.append('\b'); m_ptr++; break; case 'f': m_builder.append('\f'); m_ptr++; break; case 'n': m_builder.append('\n'); m_ptr++; break; case 'r': m_builder.append('\r'); m_ptr++; break; case 't': m_builder.append('\t'); m_ptr++; break; case 'u': if ((m_end - m_ptr) < 5) { m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s; return TokError; } // uNNNN == 5 characters for (int i = 1; i < 5; i++) { if (!isASCIIHexDigit(m_ptr[i])) { m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, 5 }, "\" is not a valid unicode escape"); return TokError; } } m_builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); m_ptr += 5; break; default: if (*m_ptr == '\'' && m_mode != StrictJSON) { m_builder.append('\''); m_ptr++; break; } m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, 1 }); return TokError; } } } while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator); if (m_ptr >= m_end || *m_ptr != terminator) { m_lexErrorMessage = "Unterminated string"_s; return TokError; } if (m_builder.isEmpty()) { setParserTokenString(token, runStart); token.stringLength = m_ptr - runStart; } else { if (m_builder.is8Bit()) { token.stringIs8Bit = 1; token.stringToken8 = m_builder.characters8(); } else { token.stringIs8Bit = 0; token.stringToken16 = m_builder.characters16(); } token.stringLength = m_builder.length(); } token.type = TokString; token.end = ++m_ptr; return TokString; } template TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) { // ES5 and json.org define numbers as // number // int // int frac? exp? // // int // -? 0 // -? digit1-9 digits? // // digits // digit digits? // // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? if (m_ptr < m_end && *m_ptr == '-') // -? ++m_ptr; // (0 | [1-9][0-9]*) if (m_ptr < m_end && *m_ptr == '0') // 0 ++m_ptr; else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] ++m_ptr; // [0-9]* while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } else { m_lexErrorMessage = "Invalid number"_s; return TokError; } // ('.' [0-9]+)? const int NumberOfDigitsForSafeInt32 = 9; // The numbers from -99999999 to 999999999 are always in range of Int32. if (m_ptr < m_end && *m_ptr == '.') { ++m_ptr; // [0-9]+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) { m_lexErrorMessage = "Invalid digits after decimal point"_s; return TokError; } ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) { int32_t result = 0; token.type = TokNumber; token.end = m_ptr; const CharType* digit = token.start; bool negative = false; if (*digit == '-') { negative = true; digit++; } ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32); while (digit < m_ptr) result = result * 10 + (*digit++) - '0'; if (!negative) token.numberToken = result; else { if (!result) token.numberToken = -0.0; else token.numberToken = -result; } return TokNumber; } // ([eE][+-]? [0-9]+)? if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] ++m_ptr; // [-+]? if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) ++m_ptr; // [0-9]+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) { m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s; return TokError; } ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } token.type = TokNumber; token.end = m_ptr; size_t parsedLength; token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength); return TokNumber; } template JSValue LiteralParser::parse(ParserState initialState) { VM& vm = m_globalObject->vm(); auto scope = DECLARE_THROW_SCOPE(vm); ParserState state = initialState; MarkedArgumentBuffer objectStack; JSValue lastValue; Vector stateStack; Vector identifierStack; HashSet visitedUnderscoreProto; while (1) { switch(state) { startParseArray: case StartParseArray: { JSArray* array = constructEmptyArray(m_globalObject, nullptr); RETURN_IF_EXCEPTION(scope, JSValue()); objectStack.appendWithCrashOnOverflow(array); } doParseArrayStartExpression: FALLTHROUGH; case DoParseArrayStartExpression: { TokenType lastToken = m_lexer.currentToken()->type; if (m_lexer.next() == TokRBracket) { if (lastToken == TokComma) { m_parseErrorMessage = "Unexpected comma at the end of array expression"_s; return JSValue(); } m_lexer.next(); lastValue = objectStack.takeLast(); break; } stateStack.append(DoParseArrayEndExpression); goto startParseExpression; } case DoParseArrayEndExpression: { JSArray* array = asArray(objectStack.last()); array->putDirectIndex(m_globalObject, array->length(), lastValue); RETURN_IF_EXCEPTION(scope, JSValue()); if (m_lexer.currentToken()->type == TokComma) goto doParseArrayStartExpression; if (m_lexer.currentToken()->type != TokRBracket) { m_parseErrorMessage = "Expected ']'"_s; return JSValue(); } m_lexer.next(); lastValue = objectStack.takeLast(); break; } startParseObject: case StartParseObject: { JSObject* object = constructEmptyObject(m_globalObject); objectStack.appendWithCrashOnOverflow(object); TokenType type = m_lexer.next(); if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) { typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken(); if (identifierToken->stringIs8Bit) identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength)); else identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength)); // Check for colon if (m_lexer.next() != TokColon) { m_parseErrorMessage = "Expected ':' before value in object property definition"_s; return JSValue(); } m_lexer.next(); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } if (type != TokRBrace) { m_parseErrorMessage = "Expected '}'"_s; return JSValue(); } m_lexer.next(); lastValue = objectStack.takeLast(); break; } doParseObjectStartExpression: case DoParseObjectStartExpression: { TokenType type = m_lexer.next(); if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) { m_parseErrorMessage = "Property name must be a string literal"_s; return JSValue(); } typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken(); if (identifierToken->stringIs8Bit) identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength)); else identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength)); // Check for colon if (m_lexer.next() != TokColon) { m_parseErrorMessage = "Expected ':'"_s; return JSValue(); } m_lexer.next(); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } case DoParseObjectEndExpression: { JSObject* object = asObject(objectStack.last()); Identifier ident = identifierStack.takeLast(); if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) { if (!visitedUnderscoreProto.add(object).isNewEntry) { m_parseErrorMessage = "Attempted to redefine __proto__ property"_s; return JSValue(); } PutPropertySlot slot(object, m_nullOrCodeBlock ? m_nullOrCodeBlock->ownerExecutable()->isInStrictContext() : false); objectStack.last().put(m_globalObject, ident, lastValue, slot); } else { if (Optional index = parseIndex(ident)) object->putDirectIndex(m_globalObject, index.value(), lastValue); else object->putDirect(vm, ident, lastValue); } RETURN_IF_EXCEPTION(scope, JSValue()); if (m_lexer.currentToken()->type == TokComma) goto doParseObjectStartExpression; if (m_lexer.currentToken()->type != TokRBrace) { m_parseErrorMessage = "Expected '}'"_s; return JSValue(); } m_lexer.next(); lastValue = objectStack.takeLast(); break; } startParseExpression: case StartParseExpression: { switch (m_lexer.currentToken()->type) { case TokLBracket: goto startParseArray; case TokLBrace: goto startParseObject; case TokString: { typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken(); if (stringToken->stringIs8Bit) lastValue = jsString(vm, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string()); else lastValue = jsString(vm, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string()); m_lexer.next(); break; } case TokNumber: { typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken(); lastValue = jsNumber(numberToken->numberToken); m_lexer.next(); break; } case TokNull: m_lexer.next(); lastValue = jsNull(); break; case TokTrue: m_lexer.next(); lastValue = jsBoolean(true); break; case TokFalse: m_lexer.next(); lastValue = jsBoolean(false); break; case TokRBracket: m_parseErrorMessage = "Unexpected token ']'"_s; return JSValue(); case TokRBrace: m_parseErrorMessage = "Unexpected token '}'"_s; return JSValue(); case TokIdentifier: { typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken(); auto tryMakeErrorString = [=] (typename Lexer::LiteralParserTokenPtr token, unsigned length, bool addEllipsis) -> String { if (token->stringIs8Bit) return tryMakeString("Unexpected identifier \"", StringView { token->stringToken8, length }, addEllipsis ? "..." : "", '"'); return tryMakeString("Unexpected identifier \"", StringView { token->stringToken16, length }, addEllipsis ? "..." : "", '"'); }; String errorString = tryMakeErrorString(token, token->stringLength, false); if (!errorString) { constexpr unsigned shortLength = 10; if (token->stringLength > shortLength) errorString = tryMakeErrorString(token, shortLength, true); if (!errorString) errorString = "Unexpected identifier"; } m_parseErrorMessage = errorString; return JSValue(); } case TokColon: m_parseErrorMessage = "Unexpected token ':'"_s; return JSValue(); case TokLParen: m_parseErrorMessage = "Unexpected token '('"_s; return JSValue(); case TokRParen: m_parseErrorMessage = "Unexpected token ')'"_s; return JSValue(); case TokComma: m_parseErrorMessage = "Unexpected token ','"_s; return JSValue(); case TokDot: m_parseErrorMessage = "Unexpected token '.'"_s; return JSValue(); case TokAssign: m_parseErrorMessage = "Unexpected token '='"_s; return JSValue(); case TokSemi: m_parseErrorMessage = "Unexpected token ';'"_s; return JSValue(); case TokEnd: m_parseErrorMessage = "Unexpected EOF"_s; return JSValue(); case TokError: default: // Error m_parseErrorMessage = "Could not parse value expression"_s; return JSValue(); } break; } case StartParseStatement: { switch (m_lexer.currentToken()->type) { case TokLBracket: case TokNumber: case TokString: goto startParseExpression; case TokLParen: { m_lexer.next(); stateStack.append(StartParseStatementEndStatement); goto startParseExpression; } case TokRBracket: m_parseErrorMessage = "Unexpected token ']'"_s; return JSValue(); case TokLBrace: m_parseErrorMessage = "Unexpected token '{'"_s; return JSValue(); case TokRBrace: m_parseErrorMessage = "Unexpected token '}'"_s; return JSValue(); case TokIdentifier: m_parseErrorMessage = "Unexpected identifier"_s; return JSValue(); case TokColon: m_parseErrorMessage = "Unexpected token ':'"_s; return JSValue(); case TokRParen: m_parseErrorMessage = "Unexpected token ')'"_s; return JSValue(); case TokComma: m_parseErrorMessage = "Unexpected token ','"_s; return JSValue(); case TokTrue: m_parseErrorMessage = "Unexpected token 'true'"_s; return JSValue(); case TokFalse: m_parseErrorMessage = "Unexpected token 'false'"_s; return JSValue(); case TokNull: m_parseErrorMessage = "Unexpected token 'null'"_s; return JSValue(); case TokEnd: m_parseErrorMessage = "Unexpected EOF"_s; return JSValue(); case TokDot: m_parseErrorMessage = "Unexpected token '.'"_s; return JSValue(); case TokAssign: m_parseErrorMessage = "Unexpected token '='"_s; return JSValue(); case TokSemi: m_parseErrorMessage = "Unexpected token ';'"_s; return JSValue(); case TokError: default: m_parseErrorMessage = "Could not parse statement"_s; return JSValue(); } } case StartParseStatementEndStatement: { ASSERT(stateStack.isEmpty()); if (m_lexer.currentToken()->type != TokRParen) return JSValue(); if (m_lexer.next() == TokEnd) return lastValue; m_parseErrorMessage = "Unexpected content at end of JSON literal"_s; return JSValue(); } default: RELEASE_ASSERT_NOT_REACHED(); } if (stateStack.isEmpty()) return lastValue; state = stateStack.takeLast(); continue; } } // Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h template class LiteralParser; template class LiteralParser; }