mirror of
https://github.com/darlinghq/darling-JavaScriptCore.git
synced 2024-11-26 21:50:53 +00:00
2728 lines
93 KiB
C++
2728 lines
93 KiB
C++
/*
|
|
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
|
|
* Copyright (C) 2006-2019 Apple Inc. All Rights Reserved.
|
|
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
|
|
* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
|
|
* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public License
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "Lexer.h"
|
|
|
|
#include "BuiltinNames.h"
|
|
#include "Identifier.h"
|
|
#include "KeywordLookup.h"
|
|
#include "Lexer.lut.h"
|
|
#include "ParseInt.h"
|
|
#include <limits.h>
|
|
#include <string.h>
|
|
#include <wtf/Assertions.h>
|
|
#include <wtf/HexNumber.h>
|
|
#include <wtf/Variant.h>
|
|
#include <wtf/dtoa.h>
|
|
|
|
namespace JSC {
|
|
|
|
bool isLexerKeyword(const Identifier& identifier)
|
|
{
|
|
return JSC::mainTable.entry(identifier);
|
|
}
|
|
|
|
enum CharacterType {
|
|
// Types for the main switch
|
|
|
|
// The first three types are fixed, and also used for identifying
|
|
// ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
|
|
CharacterIdentifierStart,
|
|
CharacterZero,
|
|
CharacterNumber,
|
|
|
|
// For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
|
|
// (http://unicode.org/reports/tr31/#Backward_Compatibility)
|
|
CharacterOtherIdentifierPart,
|
|
|
|
CharacterInvalid,
|
|
CharacterLineTerminator,
|
|
CharacterExclamationMark,
|
|
CharacterOpenParen,
|
|
CharacterCloseParen,
|
|
CharacterOpenBracket,
|
|
CharacterCloseBracket,
|
|
CharacterComma,
|
|
CharacterColon,
|
|
CharacterQuestion,
|
|
CharacterTilde,
|
|
CharacterQuote,
|
|
CharacterBackQuote,
|
|
CharacterDot,
|
|
CharacterSlash,
|
|
CharacterBackSlash,
|
|
CharacterSemicolon,
|
|
CharacterOpenBrace,
|
|
CharacterCloseBrace,
|
|
|
|
CharacterAdd,
|
|
CharacterSub,
|
|
CharacterMultiply,
|
|
CharacterModulo,
|
|
CharacterAnd,
|
|
CharacterXor,
|
|
CharacterOr,
|
|
CharacterLess,
|
|
CharacterGreater,
|
|
CharacterEqual,
|
|
|
|
// Other types (only one so far)
|
|
CharacterWhiteSpace,
|
|
CharacterHash,
|
|
CharacterPrivateIdentifierStart
|
|
};
|
|
|
|
// 256 Latin-1 codes
|
|
static constexpr const unsigned short typesOfLatin1Characters[256] = {
|
|
/* 0 - Null */ CharacterInvalid,
|
|
/* 1 - Start of Heading */ CharacterInvalid,
|
|
/* 2 - Start of Text */ CharacterInvalid,
|
|
/* 3 - End of Text */ CharacterInvalid,
|
|
/* 4 - End of Transm. */ CharacterInvalid,
|
|
/* 5 - Enquiry */ CharacterInvalid,
|
|
/* 6 - Acknowledgment */ CharacterInvalid,
|
|
/* 7 - Bell */ CharacterInvalid,
|
|
/* 8 - Back Space */ CharacterInvalid,
|
|
/* 9 - Horizontal Tab */ CharacterWhiteSpace,
|
|
/* 10 - Line Feed */ CharacterLineTerminator,
|
|
/* 11 - Vertical Tab */ CharacterWhiteSpace,
|
|
/* 12 - Form Feed */ CharacterWhiteSpace,
|
|
/* 13 - Carriage Return */ CharacterLineTerminator,
|
|
/* 14 - Shift Out */ CharacterInvalid,
|
|
/* 15 - Shift In */ CharacterInvalid,
|
|
/* 16 - Data Line Escape */ CharacterInvalid,
|
|
/* 17 - Device Control 1 */ CharacterInvalid,
|
|
/* 18 - Device Control 2 */ CharacterInvalid,
|
|
/* 19 - Device Control 3 */ CharacterInvalid,
|
|
/* 20 - Device Control 4 */ CharacterInvalid,
|
|
/* 21 - Negative Ack. */ CharacterInvalid,
|
|
/* 22 - Synchronous Idle */ CharacterInvalid,
|
|
/* 23 - End of Transmit */ CharacterInvalid,
|
|
/* 24 - Cancel */ CharacterInvalid,
|
|
/* 25 - End of Medium */ CharacterInvalid,
|
|
/* 26 - Substitute */ CharacterInvalid,
|
|
/* 27 - Escape */ CharacterInvalid,
|
|
/* 28 - File Separator */ CharacterInvalid,
|
|
/* 29 - Group Separator */ CharacterInvalid,
|
|
/* 30 - Record Separator */ CharacterInvalid,
|
|
/* 31 - Unit Separator */ CharacterInvalid,
|
|
/* 32 - Space */ CharacterWhiteSpace,
|
|
/* 33 - ! */ CharacterExclamationMark,
|
|
/* 34 - " */ CharacterQuote,
|
|
/* 35 - # */ CharacterHash,
|
|
/* 36 - $ */ CharacterIdentifierStart,
|
|
/* 37 - % */ CharacterModulo,
|
|
/* 38 - & */ CharacterAnd,
|
|
/* 39 - ' */ CharacterQuote,
|
|
/* 40 - ( */ CharacterOpenParen,
|
|
/* 41 - ) */ CharacterCloseParen,
|
|
/* 42 - * */ CharacterMultiply,
|
|
/* 43 - + */ CharacterAdd,
|
|
/* 44 - , */ CharacterComma,
|
|
/* 45 - - */ CharacterSub,
|
|
/* 46 - . */ CharacterDot,
|
|
/* 47 - / */ CharacterSlash,
|
|
/* 48 - 0 */ CharacterZero,
|
|
/* 49 - 1 */ CharacterNumber,
|
|
/* 50 - 2 */ CharacterNumber,
|
|
/* 51 - 3 */ CharacterNumber,
|
|
/* 52 - 4 */ CharacterNumber,
|
|
/* 53 - 5 */ CharacterNumber,
|
|
/* 54 - 6 */ CharacterNumber,
|
|
/* 55 - 7 */ CharacterNumber,
|
|
/* 56 - 8 */ CharacterNumber,
|
|
/* 57 - 9 */ CharacterNumber,
|
|
/* 58 - : */ CharacterColon,
|
|
/* 59 - ; */ CharacterSemicolon,
|
|
/* 60 - < */ CharacterLess,
|
|
/* 61 - = */ CharacterEqual,
|
|
/* 62 - > */ CharacterGreater,
|
|
/* 63 - ? */ CharacterQuestion,
|
|
/* 64 - @ */ CharacterPrivateIdentifierStart,
|
|
/* 65 - A */ CharacterIdentifierStart,
|
|
/* 66 - B */ CharacterIdentifierStart,
|
|
/* 67 - C */ CharacterIdentifierStart,
|
|
/* 68 - D */ CharacterIdentifierStart,
|
|
/* 69 - E */ CharacterIdentifierStart,
|
|
/* 70 - F */ CharacterIdentifierStart,
|
|
/* 71 - G */ CharacterIdentifierStart,
|
|
/* 72 - H */ CharacterIdentifierStart,
|
|
/* 73 - I */ CharacterIdentifierStart,
|
|
/* 74 - J */ CharacterIdentifierStart,
|
|
/* 75 - K */ CharacterIdentifierStart,
|
|
/* 76 - L */ CharacterIdentifierStart,
|
|
/* 77 - M */ CharacterIdentifierStart,
|
|
/* 78 - N */ CharacterIdentifierStart,
|
|
/* 79 - O */ CharacterIdentifierStart,
|
|
/* 80 - P */ CharacterIdentifierStart,
|
|
/* 81 - Q */ CharacterIdentifierStart,
|
|
/* 82 - R */ CharacterIdentifierStart,
|
|
/* 83 - S */ CharacterIdentifierStart,
|
|
/* 84 - T */ CharacterIdentifierStart,
|
|
/* 85 - U */ CharacterIdentifierStart,
|
|
/* 86 - V */ CharacterIdentifierStart,
|
|
/* 87 - W */ CharacterIdentifierStart,
|
|
/* 88 - X */ CharacterIdentifierStart,
|
|
/* 89 - Y */ CharacterIdentifierStart,
|
|
/* 90 - Z */ CharacterIdentifierStart,
|
|
/* 91 - [ */ CharacterOpenBracket,
|
|
/* 92 - \ */ CharacterBackSlash,
|
|
/* 93 - ] */ CharacterCloseBracket,
|
|
/* 94 - ^ */ CharacterXor,
|
|
/* 95 - _ */ CharacterIdentifierStart,
|
|
/* 96 - ` */ CharacterBackQuote,
|
|
/* 97 - a */ CharacterIdentifierStart,
|
|
/* 98 - b */ CharacterIdentifierStart,
|
|
/* 99 - c */ CharacterIdentifierStart,
|
|
/* 100 - d */ CharacterIdentifierStart,
|
|
/* 101 - e */ CharacterIdentifierStart,
|
|
/* 102 - f */ CharacterIdentifierStart,
|
|
/* 103 - g */ CharacterIdentifierStart,
|
|
/* 104 - h */ CharacterIdentifierStart,
|
|
/* 105 - i */ CharacterIdentifierStart,
|
|
/* 106 - j */ CharacterIdentifierStart,
|
|
/* 107 - k */ CharacterIdentifierStart,
|
|
/* 108 - l */ CharacterIdentifierStart,
|
|
/* 109 - m */ CharacterIdentifierStart,
|
|
/* 110 - n */ CharacterIdentifierStart,
|
|
/* 111 - o */ CharacterIdentifierStart,
|
|
/* 112 - p */ CharacterIdentifierStart,
|
|
/* 113 - q */ CharacterIdentifierStart,
|
|
/* 114 - r */ CharacterIdentifierStart,
|
|
/* 115 - s */ CharacterIdentifierStart,
|
|
/* 116 - t */ CharacterIdentifierStart,
|
|
/* 117 - u */ CharacterIdentifierStart,
|
|
/* 118 - v */ CharacterIdentifierStart,
|
|
/* 119 - w */ CharacterIdentifierStart,
|
|
/* 120 - x */ CharacterIdentifierStart,
|
|
/* 121 - y */ CharacterIdentifierStart,
|
|
/* 122 - z */ CharacterIdentifierStart,
|
|
/* 123 - { */ CharacterOpenBrace,
|
|
/* 124 - | */ CharacterOr,
|
|
/* 125 - } */ CharacterCloseBrace,
|
|
/* 126 - ~ */ CharacterTilde,
|
|
/* 127 - Delete */ CharacterInvalid,
|
|
/* 128 - Cc category */ CharacterInvalid,
|
|
/* 129 - Cc category */ CharacterInvalid,
|
|
/* 130 - Cc category */ CharacterInvalid,
|
|
/* 131 - Cc category */ CharacterInvalid,
|
|
/* 132 - Cc category */ CharacterInvalid,
|
|
/* 133 - Cc category */ CharacterInvalid,
|
|
/* 134 - Cc category */ CharacterInvalid,
|
|
/* 135 - Cc category */ CharacterInvalid,
|
|
/* 136 - Cc category */ CharacterInvalid,
|
|
/* 137 - Cc category */ CharacterInvalid,
|
|
/* 138 - Cc category */ CharacterInvalid,
|
|
/* 139 - Cc category */ CharacterInvalid,
|
|
/* 140 - Cc category */ CharacterInvalid,
|
|
/* 141 - Cc category */ CharacterInvalid,
|
|
/* 142 - Cc category */ CharacterInvalid,
|
|
/* 143 - Cc category */ CharacterInvalid,
|
|
/* 144 - Cc category */ CharacterInvalid,
|
|
/* 145 - Cc category */ CharacterInvalid,
|
|
/* 146 - Cc category */ CharacterInvalid,
|
|
/* 147 - Cc category */ CharacterInvalid,
|
|
/* 148 - Cc category */ CharacterInvalid,
|
|
/* 149 - Cc category */ CharacterInvalid,
|
|
/* 150 - Cc category */ CharacterInvalid,
|
|
/* 151 - Cc category */ CharacterInvalid,
|
|
/* 152 - Cc category */ CharacterInvalid,
|
|
/* 153 - Cc category */ CharacterInvalid,
|
|
/* 154 - Cc category */ CharacterInvalid,
|
|
/* 155 - Cc category */ CharacterInvalid,
|
|
/* 156 - Cc category */ CharacterInvalid,
|
|
/* 157 - Cc category */ CharacterInvalid,
|
|
/* 158 - Cc category */ CharacterInvalid,
|
|
/* 159 - Cc category */ CharacterInvalid,
|
|
/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
|
|
/* 161 - Po category */ CharacterInvalid,
|
|
/* 162 - Sc category */ CharacterInvalid,
|
|
/* 163 - Sc category */ CharacterInvalid,
|
|
/* 164 - Sc category */ CharacterInvalid,
|
|
/* 165 - Sc category */ CharacterInvalid,
|
|
/* 166 - So category */ CharacterInvalid,
|
|
/* 167 - So category */ CharacterInvalid,
|
|
/* 168 - Sk category */ CharacterInvalid,
|
|
/* 169 - So category */ CharacterInvalid,
|
|
/* 170 - Ll category */ CharacterIdentifierStart,
|
|
/* 171 - Pi category */ CharacterInvalid,
|
|
/* 172 - Sm category */ CharacterInvalid,
|
|
/* 173 - Cf category */ CharacterInvalid,
|
|
/* 174 - So category */ CharacterInvalid,
|
|
/* 175 - Sk category */ CharacterInvalid,
|
|
/* 176 - So category */ CharacterInvalid,
|
|
/* 177 - Sm category */ CharacterInvalid,
|
|
/* 178 - No category */ CharacterInvalid,
|
|
/* 179 - No category */ CharacterInvalid,
|
|
/* 180 - Sk category */ CharacterInvalid,
|
|
/* 181 - Ll category */ CharacterIdentifierStart,
|
|
/* 182 - So category */ CharacterInvalid,
|
|
/* 183 - Po category */ CharacterOtherIdentifierPart,
|
|
/* 184 - Sk category */ CharacterInvalid,
|
|
/* 185 - No category */ CharacterInvalid,
|
|
/* 186 - Ll category */ CharacterIdentifierStart,
|
|
/* 187 - Pf category */ CharacterInvalid,
|
|
/* 188 - No category */ CharacterInvalid,
|
|
/* 189 - No category */ CharacterInvalid,
|
|
/* 190 - No category */ CharacterInvalid,
|
|
/* 191 - Po category */ CharacterInvalid,
|
|
/* 192 - Lu category */ CharacterIdentifierStart,
|
|
/* 193 - Lu category */ CharacterIdentifierStart,
|
|
/* 194 - Lu category */ CharacterIdentifierStart,
|
|
/* 195 - Lu category */ CharacterIdentifierStart,
|
|
/* 196 - Lu category */ CharacterIdentifierStart,
|
|
/* 197 - Lu category */ CharacterIdentifierStart,
|
|
/* 198 - Lu category */ CharacterIdentifierStart,
|
|
/* 199 - Lu category */ CharacterIdentifierStart,
|
|
/* 200 - Lu category */ CharacterIdentifierStart,
|
|
/* 201 - Lu category */ CharacterIdentifierStart,
|
|
/* 202 - Lu category */ CharacterIdentifierStart,
|
|
/* 203 - Lu category */ CharacterIdentifierStart,
|
|
/* 204 - Lu category */ CharacterIdentifierStart,
|
|
/* 205 - Lu category */ CharacterIdentifierStart,
|
|
/* 206 - Lu category */ CharacterIdentifierStart,
|
|
/* 207 - Lu category */ CharacterIdentifierStart,
|
|
/* 208 - Lu category */ CharacterIdentifierStart,
|
|
/* 209 - Lu category */ CharacterIdentifierStart,
|
|
/* 210 - Lu category */ CharacterIdentifierStart,
|
|
/* 211 - Lu category */ CharacterIdentifierStart,
|
|
/* 212 - Lu category */ CharacterIdentifierStart,
|
|
/* 213 - Lu category */ CharacterIdentifierStart,
|
|
/* 214 - Lu category */ CharacterIdentifierStart,
|
|
/* 215 - Sm category */ CharacterInvalid,
|
|
/* 216 - Lu category */ CharacterIdentifierStart,
|
|
/* 217 - Lu category */ CharacterIdentifierStart,
|
|
/* 218 - Lu category */ CharacterIdentifierStart,
|
|
/* 219 - Lu category */ CharacterIdentifierStart,
|
|
/* 220 - Lu category */ CharacterIdentifierStart,
|
|
/* 221 - Lu category */ CharacterIdentifierStart,
|
|
/* 222 - Lu category */ CharacterIdentifierStart,
|
|
/* 223 - Ll category */ CharacterIdentifierStart,
|
|
/* 224 - Ll category */ CharacterIdentifierStart,
|
|
/* 225 - Ll category */ CharacterIdentifierStart,
|
|
/* 226 - Ll category */ CharacterIdentifierStart,
|
|
/* 227 - Ll category */ CharacterIdentifierStart,
|
|
/* 228 - Ll category */ CharacterIdentifierStart,
|
|
/* 229 - Ll category */ CharacterIdentifierStart,
|
|
/* 230 - Ll category */ CharacterIdentifierStart,
|
|
/* 231 - Ll category */ CharacterIdentifierStart,
|
|
/* 232 - Ll category */ CharacterIdentifierStart,
|
|
/* 233 - Ll category */ CharacterIdentifierStart,
|
|
/* 234 - Ll category */ CharacterIdentifierStart,
|
|
/* 235 - Ll category */ CharacterIdentifierStart,
|
|
/* 236 - Ll category */ CharacterIdentifierStart,
|
|
/* 237 - Ll category */ CharacterIdentifierStart,
|
|
/* 238 - Ll category */ CharacterIdentifierStart,
|
|
/* 239 - Ll category */ CharacterIdentifierStart,
|
|
/* 240 - Ll category */ CharacterIdentifierStart,
|
|
/* 241 - Ll category */ CharacterIdentifierStart,
|
|
/* 242 - Ll category */ CharacterIdentifierStart,
|
|
/* 243 - Ll category */ CharacterIdentifierStart,
|
|
/* 244 - Ll category */ CharacterIdentifierStart,
|
|
/* 245 - Ll category */ CharacterIdentifierStart,
|
|
/* 246 - Ll category */ CharacterIdentifierStart,
|
|
/* 247 - Sm category */ CharacterInvalid,
|
|
/* 248 - Ll category */ CharacterIdentifierStart,
|
|
/* 249 - Ll category */ CharacterIdentifierStart,
|
|
/* 250 - Ll category */ CharacterIdentifierStart,
|
|
/* 251 - Ll category */ CharacterIdentifierStart,
|
|
/* 252 - Ll category */ CharacterIdentifierStart,
|
|
/* 253 - Ll category */ CharacterIdentifierStart,
|
|
/* 254 - Ll category */ CharacterIdentifierStart,
|
|
/* 255 - Ll category */ CharacterIdentifierStart
|
|
};
|
|
|
|
// This table provides the character that results from \X where X is the index in the table beginning
|
|
// with SPACE. A table value of 0 means that more processing needs to be done.
|
|
static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
|
|
/* 0 - Null */ 0,
|
|
/* 1 - Start of Heading */ 0,
|
|
/* 2 - Start of Text */ 0,
|
|
/* 3 - End of Text */ 0,
|
|
/* 4 - End of Transm. */ 0,
|
|
/* 5 - Enquiry */ 0,
|
|
/* 6 - Acknowledgment */ 0,
|
|
/* 7 - Bell */ 0,
|
|
/* 8 - Back Space */ 0,
|
|
/* 9 - Horizontal Tab */ 0,
|
|
/* 10 - Line Feed */ 0,
|
|
/* 11 - Vertical Tab */ 0,
|
|
/* 12 - Form Feed */ 0,
|
|
/* 13 - Carriage Return */ 0,
|
|
/* 14 - Shift Out */ 0,
|
|
/* 15 - Shift In */ 0,
|
|
/* 16 - Data Line Escape */ 0,
|
|
/* 17 - Device Control 1 */ 0,
|
|
/* 18 - Device Control 2 */ 0,
|
|
/* 19 - Device Control 3 */ 0,
|
|
/* 20 - Device Control 4 */ 0,
|
|
/* 21 - Negative Ack. */ 0,
|
|
/* 22 - Synchronous Idle */ 0,
|
|
/* 23 - End of Transmit */ 0,
|
|
/* 24 - Cancel */ 0,
|
|
/* 25 - End of Medium */ 0,
|
|
/* 26 - Substitute */ 0,
|
|
/* 27 - Escape */ 0,
|
|
/* 28 - File Separator */ 0,
|
|
/* 29 - Group Separator */ 0,
|
|
/* 30 - Record Separator */ 0,
|
|
/* 31 - Unit Separator */ 0,
|
|
/* 32 - Space */ ' ',
|
|
/* 33 - ! */ '!',
|
|
/* 34 - " */ '"',
|
|
/* 35 - # */ '#',
|
|
/* 36 - $ */ '$',
|
|
/* 37 - % */ '%',
|
|
/* 38 - & */ '&',
|
|
/* 39 - ' */ '\'',
|
|
/* 40 - ( */ '(',
|
|
/* 41 - ) */ ')',
|
|
/* 42 - * */ '*',
|
|
/* 43 - + */ '+',
|
|
/* 44 - , */ ',',
|
|
/* 45 - - */ '-',
|
|
/* 46 - . */ '.',
|
|
/* 47 - / */ '/',
|
|
/* 48 - 0 */ 0,
|
|
/* 49 - 1 */ 0,
|
|
/* 50 - 2 */ 0,
|
|
/* 51 - 3 */ 0,
|
|
/* 52 - 4 */ 0,
|
|
/* 53 - 5 */ 0,
|
|
/* 54 - 6 */ 0,
|
|
/* 55 - 7 */ 0,
|
|
/* 56 - 8 */ 0,
|
|
/* 57 - 9 */ 0,
|
|
/* 58 - : */ ':',
|
|
/* 59 - ; */ ';',
|
|
/* 60 - < */ '<',
|
|
/* 61 - = */ '=',
|
|
/* 62 - > */ '>',
|
|
/* 63 - ? */ '?',
|
|
/* 64 - @ */ '@',
|
|
/* 65 - A */ 'A',
|
|
/* 66 - B */ 'B',
|
|
/* 67 - C */ 'C',
|
|
/* 68 - D */ 'D',
|
|
/* 69 - E */ 'E',
|
|
/* 70 - F */ 'F',
|
|
/* 71 - G */ 'G',
|
|
/* 72 - H */ 'H',
|
|
/* 73 - I */ 'I',
|
|
/* 74 - J */ 'J',
|
|
/* 75 - K */ 'K',
|
|
/* 76 - L */ 'L',
|
|
/* 77 - M */ 'M',
|
|
/* 78 - N */ 'N',
|
|
/* 79 - O */ 'O',
|
|
/* 80 - P */ 'P',
|
|
/* 81 - Q */ 'Q',
|
|
/* 82 - R */ 'R',
|
|
/* 83 - S */ 'S',
|
|
/* 84 - T */ 'T',
|
|
/* 85 - U */ 'U',
|
|
/* 86 - V */ 'V',
|
|
/* 87 - W */ 'W',
|
|
/* 88 - X */ 'X',
|
|
/* 89 - Y */ 'Y',
|
|
/* 90 - Z */ 'Z',
|
|
/* 91 - [ */ '[',
|
|
/* 92 - \ */ '\\',
|
|
/* 93 - ] */ ']',
|
|
/* 94 - ^ */ '^',
|
|
/* 95 - _ */ '_',
|
|
/* 96 - ` */ '`',
|
|
/* 97 - a */ 'a',
|
|
/* 98 - b */ 0x08,
|
|
/* 99 - c */ 'c',
|
|
/* 100 - d */ 'd',
|
|
/* 101 - e */ 'e',
|
|
/* 102 - f */ 0x0C,
|
|
/* 103 - g */ 'g',
|
|
/* 104 - h */ 'h',
|
|
/* 105 - i */ 'i',
|
|
/* 106 - j */ 'j',
|
|
/* 107 - k */ 'k',
|
|
/* 108 - l */ 'l',
|
|
/* 109 - m */ 'm',
|
|
/* 110 - n */ 0x0A,
|
|
/* 111 - o */ 'o',
|
|
/* 112 - p */ 'p',
|
|
/* 113 - q */ 'q',
|
|
/* 114 - r */ 0x0D,
|
|
/* 115 - s */ 's',
|
|
/* 116 - t */ 0x09,
|
|
/* 117 - u */ 0,
|
|
/* 118 - v */ 0x0B,
|
|
/* 119 - w */ 'w',
|
|
/* 120 - x */ 0,
|
|
/* 121 - y */ 'y',
|
|
/* 122 - z */ 'z',
|
|
/* 123 - { */ '{',
|
|
/* 124 - | */ '|',
|
|
/* 125 - } */ '}',
|
|
/* 126 - ~ */ '~',
|
|
/* 127 - Delete */ 0
|
|
};
|
|
|
|
template <typename T>
|
|
Lexer<T>::Lexer(VM& vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
|
|
: m_positionBeforeLastNewline(0,0,0)
|
|
, m_isReparsingFunction(false)
|
|
, m_vm(vm)
|
|
, m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
|
|
, m_scriptMode(scriptMode)
|
|
{
|
|
}
|
|
|
|
static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
|
|
{
|
|
if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
|
|
return INTEGER;
|
|
return DOUBLE;
|
|
}
|
|
|
|
template <typename T>
|
|
Lexer<T>::~Lexer()
|
|
{
|
|
}
|
|
|
|
template <typename T>
|
|
String Lexer<T>::invalidCharacterMessage() const
|
|
{
|
|
switch (m_current) {
|
|
case 0:
|
|
return "Invalid character: '\\0'"_s;
|
|
case 10:
|
|
return "Invalid character: '\\n'"_s;
|
|
case 11:
|
|
return "Invalid character: '\\v'"_s;
|
|
case 13:
|
|
return "Invalid character: '\\r'"_s;
|
|
case 35:
|
|
return "Invalid character: '#'"_s;
|
|
case 64:
|
|
return "Invalid character: '@'"_s;
|
|
case 96:
|
|
return "Invalid character: '`'"_s;
|
|
default:
|
|
return makeString("Invalid character '\\u", hex(m_current, 4, Lowercase), '\'');
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
|
|
{
|
|
ASSERT(m_code <= m_codeEnd);
|
|
return m_code;
|
|
}
|
|
|
|
template <typename T>
|
|
void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
|
|
{
|
|
m_arena = &arena->identifierArena();
|
|
|
|
m_lineNumber = source.firstLine().oneBasedInt();
|
|
m_lastToken = -1;
|
|
|
|
StringView sourceString = source.provider()->source();
|
|
|
|
if (!sourceString.isNull())
|
|
setCodeStart(sourceString);
|
|
else
|
|
m_codeStart = nullptr;
|
|
|
|
m_source = &source;
|
|
m_sourceOffset = source.startOffset();
|
|
m_codeStartPlusOffset = m_codeStart + source.startOffset();
|
|
m_code = m_codeStartPlusOffset;
|
|
m_codeEnd = m_codeStart + source.endOffset();
|
|
m_error = false;
|
|
m_atLineStart = true;
|
|
m_lineStart = m_code;
|
|
m_lexErrorMessage = String();
|
|
m_sourceURLDirective = String();
|
|
m_sourceMappingURLDirective = String();
|
|
|
|
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
|
|
m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
|
|
m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
|
|
|
|
if (LIKELY(m_code < m_codeEnd))
|
|
m_current = *m_code;
|
|
else
|
|
m_current = 0;
|
|
ASSERT(currentOffset() == source.startOffset());
|
|
}
|
|
|
|
template <typename T>
|
|
template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
|
|
{
|
|
m_code += shiftAmount;
|
|
ASSERT(currentOffset() >= currentLineStartOffset());
|
|
m_current = *m_code;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE void Lexer<T>::shift()
|
|
{
|
|
// At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
|
|
m_current = 0;
|
|
++m_code;
|
|
if (LIKELY(m_code < m_codeEnd))
|
|
m_current = *m_code;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE bool Lexer<T>::atEnd() const
|
|
{
|
|
ASSERT(!m_current || m_code < m_codeEnd);
|
|
return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE T Lexer<T>::peek(int offset) const
|
|
{
|
|
ASSERT(offset > 0 && offset < 5);
|
|
const T* code = m_code + offset;
|
|
return (code < m_codeEnd) ? *code : 0;
|
|
}
|
|
|
|
struct ParsedUnicodeEscapeValue {
|
|
ParsedUnicodeEscapeValue(UChar32 value)
|
|
: m_value(value)
|
|
{
|
|
ASSERT(isValid());
|
|
}
|
|
|
|
enum SpecialValueType { Incomplete = -2, Invalid = -1 };
|
|
ParsedUnicodeEscapeValue(SpecialValueType type)
|
|
: m_value(type)
|
|
{
|
|
}
|
|
|
|
bool isValid() const { return m_value >= 0; }
|
|
bool isIncomplete() const { return m_value == Incomplete; }
|
|
|
|
UChar32 value() const
|
|
{
|
|
ASSERT(isValid());
|
|
return m_value;
|
|
}
|
|
|
|
private:
|
|
UChar32 m_value;
|
|
};
|
|
|
|
template<typename CharacterType>
|
|
ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
|
|
{
|
|
if (m_current == '{') {
|
|
shift();
|
|
UChar32 codePoint = 0;
|
|
do {
|
|
if (!isASCIIHexDigit(m_current))
|
|
return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
|
|
codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
|
|
if (codePoint > UCHAR_MAX_VALUE) {
|
|
// For raw template literal syntax, we consume `NotEscapeSequence`.
|
|
// Here, we consume NotCodePoint's HexDigits.
|
|
//
|
|
// NotEscapeSequence ::
|
|
// u { [lookahread not one of HexDigit]
|
|
// u { NotCodePoint
|
|
// u { CodePoint [lookahead != }]
|
|
//
|
|
// NotCodePoint ::
|
|
// HexDigits but not if MV of HexDigits <= 0x10FFFF
|
|
//
|
|
// CodePoint ::
|
|
// HexDigits but not if MV of HexDigits > 0x10FFFF
|
|
shift();
|
|
while (isASCIIHexDigit(m_current))
|
|
shift();
|
|
|
|
return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
|
|
}
|
|
shift();
|
|
} while (m_current != '}');
|
|
shift();
|
|
return codePoint;
|
|
}
|
|
|
|
auto character2 = peek(1);
|
|
auto character3 = peek(2);
|
|
auto character4 = peek(3);
|
|
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
|
|
auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
|
|
|
|
// For raw template literal syntax, we consume `NotEscapeSequence`.
|
|
//
|
|
// NotEscapeSequence ::
|
|
// u [lookahead not one of HexDigit][lookahead != {]
|
|
// u HexDigit [lookahead not one of HexDigit]
|
|
// u HexDigit HexDigit [lookahead not one of HexDigit]
|
|
// u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
|
|
while (isASCIIHexDigit(m_current))
|
|
shift();
|
|
|
|
return result;
|
|
}
|
|
|
|
auto result = convertUnicode(m_current, character2, character3, character4);
|
|
shift();
|
|
shift();
|
|
shift();
|
|
shift();
|
|
return result;
|
|
}
|
|
|
|
template <typename T>
|
|
void Lexer<T>::shiftLineTerminator()
|
|
{
|
|
ASSERT(isLineTerminator(m_current));
|
|
|
|
m_positionBeforeLastNewline = currentPosition();
|
|
T prev = m_current;
|
|
shift();
|
|
|
|
if (prev == '\r' && m_current == '\n')
|
|
shift();
|
|
|
|
++m_lineNumber;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
|
|
{
|
|
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE void Lexer<T>::skipWhitespace()
|
|
{
|
|
while (isWhiteSpace(m_current))
|
|
shift();
|
|
}
|
|
|
|
static bool isNonLatin1IdentStart(UChar32 c)
|
|
{
|
|
return u_hasBinaryProperty(c, UCHAR_ID_START);
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static ALWAYS_INLINE bool isIdentStart(CharacterType c)
|
|
{
|
|
static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentStart for UChars that don't need to check for surrogate pairs");
|
|
if (!isLatin1(c))
|
|
return isNonLatin1IdentStart(c);
|
|
return typesOfLatin1Characters[static_cast<LChar>(c)] == CharacterIdentifierStart;
|
|
}
|
|
|
|
static ALWAYS_INLINE UNUSED_FUNCTION bool isSingleCharacterIdentStart(UChar c)
|
|
{
|
|
if (LIKELY(isLatin1(c)))
|
|
return isIdentStart(static_cast<LChar>(c));
|
|
return !U16_IS_SURROGATE(c) && isIdentStart(static_cast<UChar32>(c));
|
|
}
|
|
|
|
static ALWAYS_INLINE bool cannotBeIdentStart(LChar c)
|
|
{
|
|
return !isIdentStart(c) && c != '\\';
|
|
}
|
|
|
|
static ALWAYS_INLINE bool cannotBeIdentStart(UChar c)
|
|
{
|
|
if (LIKELY(isLatin1(c)))
|
|
return cannotBeIdentStart(static_cast<LChar>(c));
|
|
return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
|
|
}
|
|
|
|
static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
|
|
{
|
|
return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D;
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static ALWAYS_INLINE bool isIdentPart(CharacterType c)
|
|
{
|
|
static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentPart for UChars that don't need to check for surrogate pairs");
|
|
if (!isLatin1(c))
|
|
return isNonLatin1IdentPart(c);
|
|
|
|
// Character types are divided into two groups depending on whether they can be part of an
|
|
// identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
|
|
// part of an identifier. (See the CharacterType definition for more details.)
|
|
return typesOfLatin1Characters[static_cast<LChar>(c)] <= CharacterOtherIdentifierPart;
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isSingleCharacterIdentPart(UChar c)
|
|
{
|
|
if (LIKELY(isLatin1(c)))
|
|
return isIdentPart(static_cast<LChar>(c));
|
|
return !U16_IS_SURROGATE(c) && isIdentPart(static_cast<UChar32>(c));
|
|
}
|
|
|
|
static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar c)
|
|
{
|
|
return !isIdentPart(c) && c != '\\';
|
|
}
|
|
|
|
// NOTE: This may give give false negatives (for non-ascii) but won't give false posititves.
|
|
// This means it can be used to detect the end of a keyword (all keywords are ascii)
|
|
static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar c)
|
|
{
|
|
if (LIKELY(isLatin1(c)))
|
|
return cannotBeIdentPartOrEscapeStart(static_cast<LChar>(c));
|
|
return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
|
|
}
|
|
|
|
|
|
template<>
|
|
ALWAYS_INLINE UChar32 Lexer<LChar>::currentCodePoint() const
|
|
{
|
|
return m_current;
|
|
}
|
|
|
|
template<>
|
|
ALWAYS_INLINE UChar32 Lexer<UChar>::currentCodePoint() const
|
|
{
|
|
ASSERT_WITH_MESSAGE(!isIdentStart(static_cast<UChar32>(U_SENTINEL)), "error values shouldn't appear as a valid identifier start code point");
|
|
if (!U16_IS_SURROGATE(m_current))
|
|
return m_current;
|
|
|
|
UChar trail = peek(1);
|
|
if (UNLIKELY(!U16_IS_LEAD(m_current) || !U16_IS_SURROGATE_TRAIL(trail)))
|
|
return U_SENTINEL;
|
|
|
|
UChar32 codePoint = U16_GET_SUPPLEMENTARY(m_current, trail);
|
|
return codePoint;
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static inline bool isASCIIDigitOrSeparator(CharacterType character)
|
|
{
|
|
return isASCIIDigit(character) || character == '_';
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
|
|
{
|
|
return isASCIIHexDigit(character) || character == '_';
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
|
|
{
|
|
return isASCIIBinaryDigit(character) || character == '_';
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
|
|
{
|
|
return isASCIIOctalDigit(character) || character == '_';
|
|
}
|
|
|
|
static inline LChar singleEscape(int c)
|
|
{
|
|
if (c < 128) {
|
|
ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
|
|
return singleCharacterEscapeValuesForASCII[c];
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Lexer<T>::record8(int c)
|
|
{
|
|
ASSERT(isLatin1(c));
|
|
m_buffer8.append(static_cast<LChar>(c));
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Lexer<T>::append8(const T* p, size_t length)
|
|
{
|
|
size_t currentSize = m_buffer8.size();
|
|
m_buffer8.grow(currentSize + length);
|
|
LChar* rawBuffer = m_buffer8.data() + currentSize;
|
|
|
|
for (size_t i = 0; i < length; i++) {
|
|
T c = p[i];
|
|
ASSERT(isLatin1(c));
|
|
rawBuffer[i] = c;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Lexer<T>::append16(const LChar* p, size_t length)
|
|
{
|
|
size_t currentSize = m_buffer16.size();
|
|
m_buffer16.grow(currentSize + length);
|
|
UChar* rawBuffer = m_buffer16.data() + currentSize;
|
|
|
|
for (size_t i = 0; i < length; i++)
|
|
rawBuffer[i] = p[i];
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Lexer<T>::record16(T c)
|
|
{
|
|
m_buffer16.append(c);
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Lexer<T>::record16(int c)
|
|
{
|
|
ASSERT(c >= 0);
|
|
ASSERT(c <= static_cast<int>(USHRT_MAX));
|
|
m_buffer16.append(static_cast<UChar>(c));
|
|
}
|
|
|
|
template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
|
|
{
|
|
ASSERT(codePoint >= 0);
|
|
ASSERT(codePoint <= UCHAR_MAX_VALUE);
|
|
if (U_IS_BMP(codePoint))
|
|
record16(codePoint);
|
|
else {
|
|
UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
|
|
append16(codeUnits, 2);
|
|
}
|
|
}
|
|
|
|
#if ASSERT_ENABLED
|
|
bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
|
|
{
|
|
if (!ident)
|
|
return true;
|
|
/* Just block any use of suspicious identifiers. This is intended to
|
|
* be used as a safety net while implementing builtins.
|
|
*/
|
|
// FIXME: How can a debug-only assertion be a safety net?
|
|
if (*ident == vm.propertyNames->builtinNames().callPublicName())
|
|
return false;
|
|
if (*ident == vm.propertyNames->builtinNames().applyPublicName())
|
|
return false;
|
|
if (*ident == vm.propertyNames->eval)
|
|
return false;
|
|
if (*ident == vm.propertyNames->Function)
|
|
return false;
|
|
return true;
|
|
}
|
|
#endif // ASSERT_ENABLED
|
|
|
|
template <>
|
|
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
|
{
|
|
tokenData->escaped = false;
|
|
const ptrdiff_t remaining = m_codeEnd - m_code;
|
|
if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
|
|
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
|
|
if (keyword != IDENT) {
|
|
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
|
|
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
|
|
}
|
|
}
|
|
|
|
bool isPrivateName = m_current == '#';
|
|
bool isBuiltinName = m_current == '@' && m_parsingBuiltinFunction;
|
|
bool isWellKnownSymbol = false;
|
|
if (isBuiltinName) {
|
|
ASSERT(m_parsingBuiltinFunction);
|
|
shift();
|
|
if (m_current == '@') {
|
|
isWellKnownSymbol = true;
|
|
shift();
|
|
}
|
|
}
|
|
|
|
const LChar* identifierStart = currentSourcePtr();
|
|
|
|
if (isPrivateName)
|
|
shift();
|
|
|
|
ASSERT(isIdentStart(m_current) || m_current == '\\');
|
|
while (isIdentPart(m_current))
|
|
shift();
|
|
|
|
if (UNLIKELY(m_current == '\\'))
|
|
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
|
|
|
|
const Identifier* ident = nullptr;
|
|
|
|
if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
|
|
int identifierLength = currentSourcePtr() - identifierStart;
|
|
ident = makeIdentifier(identifierStart, identifierLength);
|
|
if (m_parsingBuiltinFunction && isBuiltinName) {
|
|
if (isWellKnownSymbol)
|
|
ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpWellKnownSymbol(identifierStart, identifierLength));
|
|
else
|
|
ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpPrivateName(identifierStart, identifierLength));
|
|
if (!ident)
|
|
return INVALID_PRIVATE_NAME_ERRORTOK;
|
|
} else {
|
|
ident = makeIdentifier(identifierStart, identifierLength);
|
|
if (m_parsingBuiltinFunction) {
|
|
if (!isSafeBuiltinIdentifier(m_vm, ident)) {
|
|
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
|
|
return ERRORTOK;
|
|
}
|
|
if (*ident == m_vm.propertyNames->undefinedKeyword)
|
|
tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
|
|
}
|
|
}
|
|
tokenData->ident = ident;
|
|
} else
|
|
tokenData->ident = nullptr;
|
|
|
|
auto identType = isPrivateName ? PRIVATENAME : IDENT;
|
|
if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isBuiltinName) {
|
|
ASSERT(shouldCreateIdentifier);
|
|
if (remaining < maxTokenLength) {
|
|
const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
|
ASSERT((remaining < maxTokenLength) || !entry);
|
|
if (!entry)
|
|
return identType;
|
|
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
|
return (token != RESERVED_IF_STRICT) || strictMode ? token : identType;
|
|
}
|
|
return identType;
|
|
}
|
|
|
|
return identType;
|
|
}
|
|
|
|
template <>
|
|
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
|
{
|
|
ASSERT(!m_parsingBuiltinFunction);
|
|
tokenData->escaped = false;
|
|
const ptrdiff_t remaining = m_codeEnd - m_code;
|
|
if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
|
|
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
|
|
if (keyword != IDENT) {
|
|
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
|
|
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
|
|
}
|
|
}
|
|
|
|
bool isPrivateName = m_current == '#';
|
|
const UChar* identifierStart = currentSourcePtr();
|
|
|
|
if (isPrivateName)
|
|
shift();
|
|
|
|
UChar orAllChars = 0;
|
|
ASSERT(isSingleCharacterIdentStart(m_current) || U16_IS_SURROGATE(m_current) || m_current == '\\');
|
|
while (isSingleCharacterIdentPart(m_current)) {
|
|
orAllChars |= m_current;
|
|
shift();
|
|
}
|
|
|
|
if (UNLIKELY(U16_IS_SURROGATE(m_current) || m_current == '\\'))
|
|
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
|
|
|
|
bool isAll8Bit = !(orAllChars & ~0xff);
|
|
const Identifier* ident = nullptr;
|
|
|
|
if (shouldCreateIdentifier) {
|
|
int identifierLength = currentSourcePtr() - identifierStart;
|
|
if (isAll8Bit)
|
|
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
|
|
else
|
|
ident = makeIdentifier(identifierStart, identifierLength);
|
|
tokenData->ident = ident;
|
|
} else
|
|
tokenData->ident = nullptr;
|
|
|
|
if (isPrivateName)
|
|
return PRIVATENAME;
|
|
|
|
if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
|
|
ASSERT(shouldCreateIdentifier);
|
|
if (remaining < maxTokenLength) {
|
|
const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
|
ASSERT((remaining < maxTokenLength) || !entry);
|
|
if (!entry)
|
|
return IDENT;
|
|
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
|
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
|
|
}
|
|
return IDENT;
|
|
}
|
|
|
|
return IDENT;
|
|
}
|
|
|
|
template<typename CharacterType>
|
|
template<bool shouldCreateIdentifier>
|
|
JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode, const CharacterType* identifierStart)
|
|
{
|
|
ASSERT(U16_IS_SURROGATE(m_current) || m_current == '\\');
|
|
ASSERT(m_buffer16.isEmpty());
|
|
ASSERT(!tokenData->escaped);
|
|
|
|
auto identCharsStart = identifierStart;
|
|
bool isPrivateName = *identifierStart == '#';
|
|
if (isPrivateName)
|
|
++identCharsStart;
|
|
|
|
JSTokenType identType = isPrivateName ? PRIVATENAME : IDENT;
|
|
ASSERT(!isPrivateName || identifierStart != currentSourcePtr());
|
|
|
|
auto fillBuffer = [&] (bool isStart = false) {
|
|
// \uXXXX unicode characters or Surrogate pairs.
|
|
if (identifierStart != currentSourcePtr())
|
|
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
|
|
|
|
if (m_current == '\\') {
|
|
tokenData->escaped = true;
|
|
shift();
|
|
if (UNLIKELY(m_current != 'u'))
|
|
return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
|
|
shift();
|
|
auto character = parseUnicodeEscape();
|
|
if (UNLIKELY(!character.isValid()))
|
|
return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
|
|
if (UNLIKELY(isStart ? !isIdentStart(character.value()) : !isIdentPart(character.value())))
|
|
return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
|
|
if (shouldCreateIdentifier)
|
|
recordUnicodeCodePoint(character.value());
|
|
identifierStart = currentSourcePtr();
|
|
return identType;
|
|
}
|
|
|
|
ASSERT(U16_IS_SURROGATE(m_current));
|
|
if (UNLIKELY(!U16_IS_SURROGATE_LEAD(m_current)))
|
|
return INVALID_UNICODE_ENCODING_ERRORTOK;
|
|
|
|
UChar32 codePoint = currentCodePoint();
|
|
if (UNLIKELY(codePoint == U_SENTINEL))
|
|
return INVALID_UNICODE_ENCODING_ERRORTOK;
|
|
if (UNLIKELY(isStart ? !isNonLatin1IdentStart(codePoint) : !isNonLatin1IdentPart(codePoint)))
|
|
return INVALID_IDENTIFIER_UNICODE_ERRORTOK;
|
|
append16(m_code, 2);
|
|
shift();
|
|
shift();
|
|
identifierStart = currentSourcePtr();
|
|
return identType;
|
|
};
|
|
|
|
JSTokenType type = fillBuffer(identCharsStart == currentSourcePtr());
|
|
if (UNLIKELY(type & CanBeErrorTokenFlag))
|
|
return type;
|
|
|
|
while (true) {
|
|
if (LIKELY(isSingleCharacterIdentPart(m_current))) {
|
|
shift();
|
|
continue;
|
|
}
|
|
if (!U16_IS_SURROGATE(m_current) && m_current != '\\')
|
|
break;
|
|
|
|
type = fillBuffer();
|
|
if (UNLIKELY(type & CanBeErrorTokenFlag))
|
|
return type;
|
|
}
|
|
|
|
const Identifier* ident = nullptr;
|
|
if (shouldCreateIdentifier) {
|
|
if (identifierStart != currentSourcePtr())
|
|
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
|
|
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
|
|
|
tokenData->ident = ident;
|
|
} else
|
|
tokenData->ident = nullptr;
|
|
|
|
m_buffer16.shrink(0);
|
|
|
|
if (LIKELY(!lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
|
|
ASSERT(shouldCreateIdentifier);
|
|
const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
|
if (!entry)
|
|
return identType;
|
|
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
|
if ((token != RESERVED_IF_STRICT) || strictMode)
|
|
return ESCAPED_KEYWORD;
|
|
}
|
|
|
|
return identType;
|
|
}
|
|
|
|
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
|
|
{
|
|
return character < 0xE;
|
|
}
|
|
|
|
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
|
|
{
|
|
return character < 0xE || !isLatin1(character);
|
|
}
|
|
|
|
template <typename T>
|
|
template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
|
|
{
|
|
int startingOffset = currentOffset();
|
|
int startingLineStartOffset = currentLineStartOffset();
|
|
int startingLineNumber = lineNumber();
|
|
T stringQuoteCharacter = m_current;
|
|
shift();
|
|
|
|
const T* stringStart = currentSourcePtr();
|
|
|
|
while (m_current != stringQuoteCharacter) {
|
|
if (UNLIKELY(m_current == '\\')) {
|
|
if (stringStart != currentSourcePtr() && shouldBuildStrings)
|
|
append8(stringStart, currentSourcePtr() - stringStart);
|
|
shift();
|
|
|
|
LChar escape = singleEscape(m_current);
|
|
|
|
// Most common escape sequences first.
|
|
if (escape) {
|
|
if (shouldBuildStrings)
|
|
record8(escape);
|
|
shift();
|
|
} else if (UNLIKELY(isLineTerminator(m_current)))
|
|
shiftLineTerminator();
|
|
else if (m_current == 'x') {
|
|
shift();
|
|
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
|
|
m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
|
|
return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
|
|
}
|
|
T prev = m_current;
|
|
shift();
|
|
if (shouldBuildStrings)
|
|
record8(convertHex(prev, m_current));
|
|
shift();
|
|
} else {
|
|
setOffset(startingOffset, startingLineStartOffset);
|
|
setLineNumber(startingLineNumber);
|
|
m_buffer8.shrink(0);
|
|
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
|
|
}
|
|
stringStart = currentSourcePtr();
|
|
continue;
|
|
}
|
|
|
|
if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
|
|
setOffset(startingOffset, startingLineStartOffset);
|
|
setLineNumber(startingLineNumber);
|
|
m_buffer8.shrink(0);
|
|
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
|
|
}
|
|
|
|
shift();
|
|
}
|
|
|
|
if (currentSourcePtr() != stringStart && shouldBuildStrings)
|
|
append8(stringStart, currentSourcePtr() - stringStart);
|
|
if (shouldBuildStrings) {
|
|
tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
|
|
m_buffer8.shrink(0);
|
|
} else
|
|
tokenData->ident = nullptr;
|
|
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
template <typename T>
|
|
template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode) -> StringParseResult
|
|
{
|
|
if (m_current == 'x') {
|
|
shift();
|
|
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
|
|
// For raw template literal syntax, we consume `NotEscapeSequence`.
|
|
//
|
|
// NotEscapeSequence ::
|
|
// x [lookahread not one of HexDigit]
|
|
// x HexDigit [lookahread not one of HexDigit]
|
|
if (isASCIIHexDigit(m_current))
|
|
shift();
|
|
ASSERT(!isASCIIHexDigit(m_current));
|
|
|
|
m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
|
|
return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
|
}
|
|
|
|
T prev = m_current;
|
|
shift();
|
|
if (shouldBuildStrings)
|
|
record16(convertHex(prev, m_current));
|
|
shift();
|
|
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
if (m_current == 'u') {
|
|
shift();
|
|
|
|
auto character = parseUnicodeEscape();
|
|
if (character.isValid()) {
|
|
if (shouldBuildStrings)
|
|
recordUnicodeCodePoint(character.value());
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
|
|
return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
|
}
|
|
|
|
if (strictMode) {
|
|
if (isASCIIDigit(m_current)) {
|
|
// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
|
|
int character1 = m_current;
|
|
shift();
|
|
if (character1 != '0' || isASCIIDigit(m_current)) {
|
|
// For raw template literal syntax, we consume `NotEscapeSequence`.
|
|
//
|
|
// NotEscapeSequence ::
|
|
// 0 DecimalDigit
|
|
// DecimalDigit but not 0
|
|
if (character1 == '0')
|
|
shift();
|
|
|
|
m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
|
|
return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
|
}
|
|
if (shouldBuildStrings)
|
|
record16(0);
|
|
return StringParsedSuccessfully;
|
|
}
|
|
} else {
|
|
if (isASCIIOctalDigit(m_current)) {
|
|
// Octal character sequences
|
|
T character1 = m_current;
|
|
shift();
|
|
if (isASCIIOctalDigit(m_current)) {
|
|
// Two octal characters
|
|
T character2 = m_current;
|
|
shift();
|
|
if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
|
|
if (shouldBuildStrings)
|
|
record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
|
|
shift();
|
|
} else {
|
|
if (shouldBuildStrings)
|
|
record16((character1 - '0') * 8 + character2 - '0');
|
|
}
|
|
} else {
|
|
if (shouldBuildStrings)
|
|
record16(character1 - '0');
|
|
}
|
|
return StringParsedSuccessfully;
|
|
}
|
|
}
|
|
|
|
if (!atEnd()) {
|
|
if (shouldBuildStrings)
|
|
record16(m_current);
|
|
shift();
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
m_lexErrorMessage = "Unterminated string constant"_s;
|
|
return StringUnterminated;
|
|
}
|
|
|
|
template <typename T>
|
|
template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
|
|
{
|
|
T stringQuoteCharacter = m_current;
|
|
shift();
|
|
|
|
const T* stringStart = currentSourcePtr();
|
|
|
|
while (m_current != stringQuoteCharacter) {
|
|
if (UNLIKELY(m_current == '\\')) {
|
|
if (stringStart != currentSourcePtr() && shouldBuildStrings)
|
|
append16(stringStart, currentSourcePtr() - stringStart);
|
|
shift();
|
|
|
|
LChar escape = singleEscape(m_current);
|
|
|
|
// Most common escape sequences first
|
|
if (escape) {
|
|
if (shouldBuildStrings)
|
|
record16(escape);
|
|
shift();
|
|
} else if (UNLIKELY(isLineTerminator(m_current)))
|
|
shiftLineTerminator();
|
|
else {
|
|
StringParseResult result = parseComplexEscape<shouldBuildStrings>(strictMode);
|
|
if (result != StringParsedSuccessfully)
|
|
return result;
|
|
}
|
|
|
|
stringStart = currentSourcePtr();
|
|
continue;
|
|
}
|
|
// Fast check for characters that require special handling.
|
|
// Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
|
|
static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
|
|
if (UNLIKELY(m_current < 0xE)) {
|
|
// New-line or end of input is not allowed
|
|
if (atEnd() || m_current == '\r' || m_current == '\n') {
|
|
m_lexErrorMessage = "Unexpected EOF"_s;
|
|
return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
|
}
|
|
// Anything else is just a normal character
|
|
}
|
|
shift();
|
|
}
|
|
|
|
if (currentSourcePtr() != stringStart && shouldBuildStrings)
|
|
append16(stringStart, currentSourcePtr() - stringStart);
|
|
if (shouldBuildStrings)
|
|
tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
|
else
|
|
tokenData->ident = nullptr;
|
|
|
|
m_buffer16.shrink(0);
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
template <typename T>
|
|
typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
|
|
{
|
|
bool parseCookedFailed = false;
|
|
const T* stringStart = currentSourcePtr();
|
|
const T* rawStringStart = currentSourcePtr();
|
|
|
|
while (m_current != '`') {
|
|
if (UNLIKELY(m_current == '\\')) {
|
|
if (stringStart != currentSourcePtr())
|
|
append16(stringStart, currentSourcePtr() - stringStart);
|
|
shift();
|
|
|
|
LChar escape = singleEscape(m_current);
|
|
|
|
// Most common escape sequences first.
|
|
if (escape) {
|
|
record16(escape);
|
|
shift();
|
|
} else if (UNLIKELY(isLineTerminator(m_current))) {
|
|
// Normalize <CR>, <CR><LF> to <LF>.
|
|
if (m_current == '\r') {
|
|
ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
|
|
|
|
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
|
|
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
|
m_bufferForRawTemplateString16.append('\n');
|
|
}
|
|
|
|
shiftLineTerminator();
|
|
rawStringStart = currentSourcePtr();
|
|
} else
|
|
shiftLineTerminator();
|
|
} else {
|
|
bool strictMode = true;
|
|
StringParseResult result = parseComplexEscape<true>(strictMode);
|
|
if (result != StringParsedSuccessfully) {
|
|
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
|
|
parseCookedFailed = true;
|
|
else
|
|
return result;
|
|
}
|
|
}
|
|
|
|
stringStart = currentSourcePtr();
|
|
continue;
|
|
}
|
|
|
|
if (m_current == '$' && peek(1) == '{')
|
|
break;
|
|
|
|
// Fast check for characters that require special handling.
|
|
// Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
|
|
// as possible, and lets through all common ASCII characters.
|
|
if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
|
|
// End of input is not allowed.
|
|
// Unlike String, line terminator is allowed.
|
|
if (atEnd()) {
|
|
m_lexErrorMessage = "Unexpected EOF"_s;
|
|
return StringUnterminated;
|
|
}
|
|
|
|
if (isLineTerminator(m_current)) {
|
|
if (m_current == '\r') {
|
|
// Normalize <CR>, <CR><LF> to <LF>.
|
|
if (stringStart != currentSourcePtr())
|
|
append16(stringStart, currentSourcePtr() - stringStart);
|
|
if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
|
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
|
|
|
record16('\n');
|
|
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
|
m_bufferForRawTemplateString16.append('\n');
|
|
shiftLineTerminator();
|
|
stringStart = currentSourcePtr();
|
|
rawStringStart = currentSourcePtr();
|
|
} else
|
|
shiftLineTerminator();
|
|
continue;
|
|
}
|
|
// Anything else is just a normal character
|
|
}
|
|
|
|
shift();
|
|
}
|
|
|
|
bool isTail = m_current == '`';
|
|
|
|
if (currentSourcePtr() != stringStart)
|
|
append16(stringStart, currentSourcePtr() - stringStart);
|
|
if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
|
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
|
|
|
if (!parseCookedFailed)
|
|
tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
|
else
|
|
tokenData->cooked = nullptr;
|
|
|
|
// Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
|
|
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
|
tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
|
|
else
|
|
tokenData->raw = nullptr;
|
|
|
|
tokenData->isTail = isTail;
|
|
|
|
m_buffer16.shrink(0);
|
|
m_bufferForRawTemplateString16.shrink(0);
|
|
|
|
if (isTail) {
|
|
// Skip `
|
|
shift();
|
|
} else {
|
|
// Skip $ and {
|
|
shift();
|
|
shift();
|
|
}
|
|
|
|
return StringParsedSuccessfully;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
|
|
{
|
|
ASSERT(isASCIIHexDigit(m_current));
|
|
|
|
// Optimization: most hexadecimal values fit into 4 bytes.
|
|
uint32_t hexValue = 0;
|
|
int maximumDigits = 7;
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIHexDigit(peek(1))))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
|
|
shift();
|
|
--maximumDigits;
|
|
} while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
|
|
|
|
if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
|
|
return NumberParseResult { hexValue };
|
|
|
|
// No more place in the hexValue buffer.
|
|
// The values are shifted out and placed into the m_buffer8 vector.
|
|
for (int i = 0; i < 8; ++i) {
|
|
int digit = hexValue >> 28;
|
|
if (digit < 10)
|
|
record8(digit + '0');
|
|
else
|
|
record8(digit - 10 + 'a');
|
|
hexValue <<= 4;
|
|
}
|
|
|
|
while (isASCIIHexDigitOrSeparator(m_current)) {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIHexDigit(peek(1))))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
}
|
|
|
|
if (UNLIKELY(m_current == 'n'))
|
|
return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
|
|
|
return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
|
|
{
|
|
ASSERT(isASCIIBinaryDigit(m_current));
|
|
|
|
// Optimization: most binary values fit into 4 bytes.
|
|
uint32_t binaryValue = 0;
|
|
const unsigned maximumDigits = 32;
|
|
int digit = maximumDigits - 1;
|
|
// Temporary buffer for the digits. Makes easier
|
|
// to reconstruct the input characters when needed.
|
|
LChar digits[maximumDigits];
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
binaryValue = (binaryValue << 1) + (m_current - '0');
|
|
digits[digit] = m_current;
|
|
shift();
|
|
--digit;
|
|
} while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
|
|
|
|
if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
|
|
return NumberParseResult { binaryValue };
|
|
|
|
for (int i = maximumDigits - 1; i > digit; --i)
|
|
record8(digits[i]);
|
|
|
|
while (isASCIIBinaryDigitOrSeparator(m_current)) {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
}
|
|
|
|
if (UNLIKELY(m_current == 'n'))
|
|
return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
|
|
|
if (isASCIIDigit(m_current))
|
|
return WTF::nullopt;
|
|
|
|
return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
|
|
{
|
|
ASSERT(isASCIIOctalDigit(m_current));
|
|
ASSERT(!m_buffer8.size() || (m_buffer8.size() == 1 && m_buffer8[0] == '0'));
|
|
bool isLegacyLiteral = m_buffer8.size();
|
|
|
|
// Optimization: most octal values fit into 4 bytes.
|
|
uint32_t octalValue = 0;
|
|
const unsigned maximumDigits = 10;
|
|
int digit = maximumDigits - 1;
|
|
// Temporary buffer for the digits. Makes easier
|
|
// to reconstruct the input characters when needed.
|
|
LChar digits[maximumDigits];
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
octalValue = octalValue * 8 + (m_current - '0');
|
|
digits[digit] = m_current;
|
|
shift();
|
|
--digit;
|
|
} while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
|
|
|
|
if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
|
|
return NumberParseResult { octalValue };
|
|
|
|
for (int i = maximumDigits - 1; i > digit; --i)
|
|
record8(digits[i]);
|
|
|
|
while (isASCIIOctalDigitOrSeparator(m_current)) {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
}
|
|
|
|
if (UNLIKELY(m_current == 'n') && !isLegacyLiteral)
|
|
return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
|
|
|
if (isASCIIDigit(m_current))
|
|
return WTF::nullopt;
|
|
|
|
return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
|
|
{
|
|
ASSERT(isASCIIDigit(m_current) || m_buffer8.size());
|
|
bool isLegacyLiteral = m_buffer8.size() && isASCIIDigitOrSeparator(m_current);
|
|
|
|
// Optimization: most decimal values fit into 4 bytes.
|
|
uint32_t decimalValue = 0;
|
|
|
|
// Since parseOctal may be executed before parseDecimal,
|
|
// the m_buffer8 may hold ascii digits.
|
|
if (!m_buffer8.size()) {
|
|
const unsigned maximumDigits = 10;
|
|
int digit = maximumDigits - 1;
|
|
// Temporary buffer for the digits. Makes easier
|
|
// to reconstruct the input characters when needed.
|
|
LChar digits[maximumDigits];
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
decimalValue = decimalValue * 10 + (m_current - '0');
|
|
digits[digit] = m_current;
|
|
shift();
|
|
--digit;
|
|
} while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
|
|
|
|
if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
|
|
return NumberParseResult { decimalValue };
|
|
|
|
for (int i = maximumDigits - 1; i > digit; --i)
|
|
record8(digits[i]);
|
|
}
|
|
|
|
while (isASCIIDigitOrSeparator(m_current)) {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
|
|
return WTF::nullopt;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
}
|
|
|
|
if (UNLIKELY(m_current == 'n' && !isLegacyLiteral))
|
|
return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
|
|
|
return WTF::nullopt;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
|
|
{
|
|
ASSERT(isASCIIDigit(m_current));
|
|
record8('.');
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIDigit(peek(1))))
|
|
return false;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
} while (isASCIIDigitOrSeparator(m_current));
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
|
|
{
|
|
record8('e');
|
|
shift();
|
|
if (m_current == '+' || m_current == '-') {
|
|
record8(m_current);
|
|
shift();
|
|
}
|
|
|
|
if (!isASCIIDigit(m_current))
|
|
return false;
|
|
|
|
do {
|
|
if (m_current == '_') {
|
|
if (UNLIKELY(!isASCIIDigit(peek(1))))
|
|
return false;
|
|
|
|
shift();
|
|
}
|
|
|
|
record8(m_current);
|
|
shift();
|
|
} while (isASCIIDigitOrSeparator(m_current));
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
|
|
{
|
|
while (true) {
|
|
while (UNLIKELY(m_current == '*')) {
|
|
shift();
|
|
if (m_current == '/') {
|
|
shift();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (atEnd())
|
|
return false;
|
|
|
|
if (isLineTerminator(m_current)) {
|
|
shiftLineTerminator();
|
|
m_hasLineTerminatorBeforeToken = true;
|
|
} else
|
|
shift();
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
|
|
{
|
|
// sourceURL and sourceMappingURL directives.
|
|
if (!consume("source"))
|
|
return;
|
|
|
|
if (consume("URL=")) {
|
|
m_sourceURLDirective = parseCommentDirectiveValue();
|
|
return;
|
|
}
|
|
|
|
if (consume("MappingURL=")) {
|
|
m_sourceMappingURLDirective = parseCommentDirectiveValue();
|
|
return;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
|
|
{
|
|
skipWhitespace();
|
|
const T* stringStart = currentSourcePtr();
|
|
while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd())
|
|
shift();
|
|
const T* stringEnd = currentSourcePtr();
|
|
skipWhitespace();
|
|
|
|
if (!isLineTerminator(m_current) && !atEnd())
|
|
return String();
|
|
|
|
append8(stringStart, stringEnd - stringStart);
|
|
String result = String(m_buffer8.data(), m_buffer8.size());
|
|
m_buffer8.shrink(0);
|
|
return result;
|
|
}
|
|
|
|
template <typename T>
|
|
template <unsigned length>
|
|
ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
|
|
{
|
|
unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
|
|
|
|
unsigned i = 0;
|
|
for (; i < lengthToCheck && m_current == input[i]; i++)
|
|
shift();
|
|
|
|
return i == lengthToCheck;
|
|
}
|
|
|
|
template <typename T>
|
|
bool Lexer<T>::nextTokenIsColon()
|
|
{
|
|
const T* code = m_code;
|
|
while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
|
|
code++;
|
|
|
|
return code < m_codeEnd && *code == ':';
|
|
}
|
|
|
|
template <typename T>
|
|
void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
|
|
{
|
|
JSTokenLocation* tokenLocation = &tokenRecord->m_location;
|
|
tokenLocation->line = lineNumber;
|
|
tokenLocation->endOffset = endOffset;
|
|
tokenLocation->lineStartOffset = lineStartOffset;
|
|
ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
|
|
tokenRecord->m_endPosition = endPosition;
|
|
m_lastToken = token;
|
|
}
|
|
|
|
template <typename T>
|
|
JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
|
{
|
|
JSTokenData* tokenData = &tokenRecord->m_data;
|
|
JSTokenLocation* tokenLocation = &tokenRecord->m_location;
|
|
m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
|
|
|
|
ASSERT(!m_error);
|
|
ASSERT(m_buffer8.isEmpty());
|
|
ASSERT(m_buffer16.isEmpty());
|
|
|
|
JSTokenType token = ERRORTOK;
|
|
|
|
start:
|
|
skipWhitespace();
|
|
|
|
tokenLocation->startOffset = currentOffset();
|
|
ASSERT(currentOffset() >= currentLineStartOffset());
|
|
tokenRecord->m_startPosition = currentPosition();
|
|
|
|
if (atEnd()) {
|
|
token = EOFTOK;
|
|
goto returnToken;
|
|
}
|
|
|
|
CharacterType type;
|
|
if (LIKELY(isLatin1(m_current)))
|
|
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
|
|
else {
|
|
UChar32 codePoint;
|
|
U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
|
|
if (isNonLatin1IdentStart(codePoint))
|
|
type = CharacterIdentifierStart;
|
|
else if (isLineTerminator(m_current))
|
|
type = CharacterLineTerminator;
|
|
else
|
|
type = CharacterInvalid;
|
|
}
|
|
|
|
switch (type) {
|
|
case CharacterGreater:
|
|
shift();
|
|
if (m_current == '>') {
|
|
shift();
|
|
if (m_current == '>') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = URSHIFTEQUAL;
|
|
break;
|
|
}
|
|
token = URSHIFT;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = RSHIFTEQUAL;
|
|
break;
|
|
}
|
|
token = RSHIFT;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = GE;
|
|
break;
|
|
}
|
|
token = GT;
|
|
break;
|
|
case CharacterEqual: {
|
|
if (peek(1) == '>') {
|
|
token = ARROWFUNCTION;
|
|
tokenData->line = lineNumber();
|
|
tokenData->offset = currentOffset();
|
|
tokenData->lineStartOffset = currentLineStartOffset();
|
|
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
|
shift();
|
|
shift();
|
|
break;
|
|
}
|
|
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = STREQ;
|
|
break;
|
|
}
|
|
token = EQEQ;
|
|
break;
|
|
}
|
|
token = EQUAL;
|
|
break;
|
|
}
|
|
case CharacterLess:
|
|
shift();
|
|
if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
|
|
if (m_scriptMode == JSParserScriptMode::Classic) {
|
|
// <!-- marks the beginning of a line comment (for www usage)
|
|
goto inSingleLineComment;
|
|
}
|
|
}
|
|
if (m_current == '<') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = LSHIFTEQUAL;
|
|
break;
|
|
}
|
|
token = LSHIFT;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = LE;
|
|
break;
|
|
}
|
|
token = LT;
|
|
break;
|
|
case CharacterExclamationMark:
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = STRNEQ;
|
|
break;
|
|
}
|
|
token = NE;
|
|
break;
|
|
}
|
|
token = EXCLAMATION;
|
|
break;
|
|
case CharacterAdd:
|
|
shift();
|
|
if (m_current == '+') {
|
|
shift();
|
|
token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = PLUSEQUAL;
|
|
break;
|
|
}
|
|
token = PLUS;
|
|
break;
|
|
case CharacterSub:
|
|
shift();
|
|
if (m_current == '-') {
|
|
shift();
|
|
if ((m_atLineStart || m_hasLineTerminatorBeforeToken) && m_current == '>') {
|
|
if (m_scriptMode == JSParserScriptMode::Classic) {
|
|
shift();
|
|
goto inSingleLineComment;
|
|
}
|
|
}
|
|
token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = MINUSEQUAL;
|
|
break;
|
|
}
|
|
token = MINUS;
|
|
break;
|
|
case CharacterMultiply:
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = MULTEQUAL;
|
|
break;
|
|
}
|
|
if (m_current == '*') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = POWEQUAL;
|
|
break;
|
|
}
|
|
token = POW;
|
|
break;
|
|
}
|
|
token = TIMES;
|
|
break;
|
|
case CharacterSlash:
|
|
shift();
|
|
if (m_current == '/') {
|
|
shift();
|
|
goto inSingleLineCommentCheckForDirectives;
|
|
}
|
|
if (m_current == '*') {
|
|
shift();
|
|
if (parseMultilineComment())
|
|
goto start;
|
|
m_lexErrorMessage = "Multiline comment was not closed properly"_s;
|
|
token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = DIVEQUAL;
|
|
break;
|
|
}
|
|
token = DIVIDE;
|
|
break;
|
|
case CharacterAnd:
|
|
shift();
|
|
if (m_current == '&') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = ANDEQUAL;
|
|
break;
|
|
}
|
|
token = AND;
|
|
break;
|
|
}
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = BITANDEQUAL;
|
|
break;
|
|
}
|
|
token = BITAND;
|
|
break;
|
|
case CharacterXor:
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = BITXOREQUAL;
|
|
break;
|
|
}
|
|
token = BITXOR;
|
|
break;
|
|
case CharacterModulo:
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = MODEQUAL;
|
|
break;
|
|
}
|
|
token = MOD;
|
|
break;
|
|
case CharacterOr:
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = BITOREQUAL;
|
|
break;
|
|
}
|
|
if (m_current == '|') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = OREQUAL;
|
|
break;
|
|
}
|
|
token = OR;
|
|
break;
|
|
}
|
|
token = BITOR;
|
|
break;
|
|
case CharacterOpenParen:
|
|
token = OPENPAREN;
|
|
tokenData->line = lineNumber();
|
|
tokenData->offset = currentOffset();
|
|
tokenData->lineStartOffset = currentLineStartOffset();
|
|
shift();
|
|
break;
|
|
case CharacterCloseParen:
|
|
token = CLOSEPAREN;
|
|
shift();
|
|
break;
|
|
case CharacterOpenBracket:
|
|
token = OPENBRACKET;
|
|
shift();
|
|
break;
|
|
case CharacterCloseBracket:
|
|
token = CLOSEBRACKET;
|
|
shift();
|
|
break;
|
|
case CharacterComma:
|
|
token = COMMA;
|
|
shift();
|
|
break;
|
|
case CharacterColon:
|
|
token = COLON;
|
|
shift();
|
|
break;
|
|
case CharacterQuestion:
|
|
shift();
|
|
if (m_current == '?') {
|
|
shift();
|
|
if (m_current == '=') {
|
|
shift();
|
|
token = COALESCEEQUAL;
|
|
break;
|
|
}
|
|
token = COALESCE;
|
|
break;
|
|
}
|
|
if (m_current == '.' && !isASCIIDigit(peek(1))) {
|
|
shift();
|
|
token = QUESTIONDOT;
|
|
break;
|
|
}
|
|
token = QUESTION;
|
|
break;
|
|
case CharacterTilde:
|
|
token = TILDE;
|
|
shift();
|
|
break;
|
|
case CharacterSemicolon:
|
|
shift();
|
|
token = SEMICOLON;
|
|
break;
|
|
case CharacterBackQuote:
|
|
shift();
|
|
token = BACKQUOTE;
|
|
break;
|
|
case CharacterOpenBrace:
|
|
tokenData->line = lineNumber();
|
|
tokenData->offset = currentOffset();
|
|
tokenData->lineStartOffset = currentLineStartOffset();
|
|
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
|
shift();
|
|
token = OPENBRACE;
|
|
break;
|
|
case CharacterCloseBrace:
|
|
tokenData->line = lineNumber();
|
|
tokenData->offset = currentOffset();
|
|
tokenData->lineStartOffset = currentLineStartOffset();
|
|
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
|
shift();
|
|
token = CLOSEBRACE;
|
|
break;
|
|
case CharacterDot:
|
|
shift();
|
|
if (!isASCIIDigit(m_current)) {
|
|
if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
|
|
shift();
|
|
shift();
|
|
token = DOTDOTDOT;
|
|
break;
|
|
}
|
|
token = DOT;
|
|
break;
|
|
}
|
|
if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
|
|
m_lexErrorMessage = "Non-number found after decimal point"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
token = DOUBLE;
|
|
if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
|
|
m_lexErrorMessage = "Non-number found after exponent indicator"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
size_t parsedLength;
|
|
tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
|
|
if (token == INTEGER)
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
|
|
if (LIKELY(cannotBeIdentStart(m_current))) {
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
|
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
case CharacterZero:
|
|
shift();
|
|
if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
|
|
if (UNLIKELY(!isASCIIHexDigit(peek(1)))) {
|
|
m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
|
|
token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
|
|
// Shift out the 'x' prefix.
|
|
shift();
|
|
|
|
auto parseNumberResult = parseHex();
|
|
if (!parseNumberResult)
|
|
tokenData->doubleValue = 0;
|
|
else if (WTF::holds_alternative<double>(*parseNumberResult))
|
|
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
|
|
else {
|
|
token = BIGINT;
|
|
shift();
|
|
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
|
|
tokenData->radix = 16;
|
|
}
|
|
|
|
if (LIKELY(cannotBeIdentStart(m_current))) {
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
|
m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
|
|
token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
|
|
if (UNLIKELY(!isASCIIBinaryDigit(peek(1)))) {
|
|
m_lexErrorMessage = "No binary digits after '0b'"_s;
|
|
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
|
|
// Shift out the 'b' prefix.
|
|
shift();
|
|
|
|
auto parseNumberResult = parseBinary();
|
|
if (!parseNumberResult)
|
|
tokenData->doubleValue = 0;
|
|
else if (WTF::holds_alternative<double>(*parseNumberResult))
|
|
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
|
|
else {
|
|
token = BIGINT;
|
|
shift();
|
|
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
|
|
tokenData->radix = 2;
|
|
}
|
|
|
|
if (LIKELY(cannotBeIdentStart(m_current))) {
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
|
m_lexErrorMessage = "No space between binary literal and identifier"_s;
|
|
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
|
|
if (UNLIKELY(!isASCIIOctalDigit(peek(1)))) {
|
|
m_lexErrorMessage = "No octal digits after '0o'"_s;
|
|
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
|
|
// Shift out the 'o' prefix.
|
|
shift();
|
|
|
|
auto parseNumberResult = parseOctal();
|
|
if (!parseNumberResult)
|
|
tokenData->doubleValue = 0;
|
|
else if (WTF::holds_alternative<double>(*parseNumberResult))
|
|
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
|
|
else {
|
|
token = BIGINT;
|
|
shift();
|
|
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
|
|
tokenData->radix = 8;
|
|
}
|
|
|
|
if (LIKELY(cannotBeIdentStart(m_current))) {
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
|
m_lexErrorMessage = "No space between octal literal and identifier"_s;
|
|
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
if (LIKELY(token != BIGINT))
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(m_current == '_')) {
|
|
m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
|
|
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
|
|
record8('0');
|
|
if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
|
|
m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
|
|
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
if (isASCIIOctalDigit(m_current)) {
|
|
auto parseNumberResult = parseOctal();
|
|
if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
|
|
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
}
|
|
}
|
|
FALLTHROUGH;
|
|
case CharacterNumber:
|
|
if (LIKELY(token != INTEGER && token != DOUBLE)) {
|
|
auto parseNumberResult = parseDecimal();
|
|
if (parseNumberResult) {
|
|
if (WTF::holds_alternative<double>(*parseNumberResult)) {
|
|
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
} else {
|
|
token = BIGINT;
|
|
shift();
|
|
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
|
|
tokenData->radix = 10;
|
|
}
|
|
} else {
|
|
token = INTEGER;
|
|
if (m_current == '.') {
|
|
shift();
|
|
if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
|
|
m_lexErrorMessage = "Non-number found after decimal point"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
token = DOUBLE;
|
|
}
|
|
if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
|
|
m_lexErrorMessage = "Non-number found after exponent indicator"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
size_t parsedLength;
|
|
tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
|
|
if (token == INTEGER)
|
|
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
|
}
|
|
}
|
|
|
|
if (LIKELY(cannotBeIdentStart(m_current))) {
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
}
|
|
|
|
if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
|
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
|
|
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
m_buffer8.shrink(0);
|
|
break;
|
|
case CharacterQuote: {
|
|
StringParseResult result = StringCannotBeParsed;
|
|
if (lexerFlags.contains(LexerFlags::DontBuildStrings))
|
|
result = parseString<false>(tokenData, strictMode);
|
|
else
|
|
result = parseString<true>(tokenData, strictMode);
|
|
|
|
if (UNLIKELY(result != StringParsedSuccessfully)) {
|
|
token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
shift();
|
|
token = STRING;
|
|
break;
|
|
}
|
|
case CharacterIdentifierStart: {
|
|
if constexpr (ASSERT_ENABLED) {
|
|
UChar32 codePoint;
|
|
U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
|
|
ASSERT(isIdentStart(codePoint));
|
|
}
|
|
FALLTHROUGH;
|
|
}
|
|
case CharacterBackSlash:
|
|
parseIdent:
|
|
if (lexerFlags.contains(LexerFlags::DontBuildKeywords))
|
|
token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
|
|
else
|
|
token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
|
|
break;
|
|
case CharacterLineTerminator:
|
|
ASSERT(isLineTerminator(m_current));
|
|
shiftLineTerminator();
|
|
m_atLineStart = true;
|
|
m_hasLineTerminatorBeforeToken = true;
|
|
m_lineStart = m_code;
|
|
goto start;
|
|
case CharacterHash: {
|
|
// Hashbang is only permitted at the start of the source text.
|
|
auto next = peek(1);
|
|
if (next == '!' && !currentOffset()) {
|
|
shift();
|
|
shift();
|
|
goto inSingleLineComment;
|
|
}
|
|
// Otherwise, it could be a valid PrivateName.
|
|
if (Options::usePrivateClassFields() && (isSingleCharacterIdentStart(next) || next == '\\')) {
|
|
lexerFlags.remove(LexerFlags::DontBuildKeywords);
|
|
goto parseIdent;
|
|
}
|
|
goto invalidCharacter;
|
|
}
|
|
case CharacterPrivateIdentifierStart:
|
|
if (m_parsingBuiltinFunction)
|
|
goto parseIdent;
|
|
goto invalidCharacter;
|
|
case CharacterOtherIdentifierPart:
|
|
case CharacterInvalid:
|
|
goto invalidCharacter;
|
|
default:
|
|
RELEASE_ASSERT_NOT_REACHED();
|
|
m_lexErrorMessage = "Internal Error"_s;
|
|
token = ERRORTOK;
|
|
goto returnError;
|
|
}
|
|
|
|
m_atLineStart = false;
|
|
goto returnToken;
|
|
|
|
inSingleLineCommentCheckForDirectives:
|
|
// Script comment directives like "//# sourceURL=test.js".
|
|
if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
|
|
shift();
|
|
shift();
|
|
parseCommentDirective();
|
|
}
|
|
// Fall through to complete single line comment parsing.
|
|
|
|
inSingleLineComment:
|
|
{
|
|
auto lineNumber = m_lineNumber;
|
|
auto endOffset = currentOffset();
|
|
auto lineStartOffset = currentLineStartOffset();
|
|
auto endPosition = currentPosition();
|
|
|
|
while (!isLineTerminator(m_current)) {
|
|
if (atEnd()) {
|
|
token = EOFTOK;
|
|
fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
|
|
return token;
|
|
}
|
|
shift();
|
|
}
|
|
shiftLineTerminator();
|
|
m_atLineStart = true;
|
|
m_hasLineTerminatorBeforeToken = true;
|
|
m_lineStart = m_code;
|
|
if (!lastTokenWasRestrKeyword())
|
|
goto start;
|
|
|
|
token = SEMICOLON;
|
|
fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
|
|
return token;
|
|
}
|
|
|
|
returnToken:
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
return token;
|
|
|
|
invalidCharacter:
|
|
m_lexErrorMessage = invalidCharacterMessage();
|
|
token = ERRORTOK;
|
|
// Falls through to return error.
|
|
|
|
returnError:
|
|
m_error = true;
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
RELEASE_ASSERT(token & CanBeErrorTokenFlag);
|
|
return token;
|
|
}
|
|
|
|
template <typename T>
|
|
static inline void orCharacter(UChar&, UChar);
|
|
|
|
template <>
|
|
inline void orCharacter<LChar>(UChar&, UChar) { }
|
|
|
|
template <>
|
|
inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
|
|
{
|
|
orAccumulator |= character;
|
|
}
|
|
|
|
template <typename T>
|
|
JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
|
|
{
|
|
JSTokenData* tokenData = &tokenRecord->m_data;
|
|
ASSERT(m_buffer16.isEmpty());
|
|
|
|
bool lastWasEscape = false;
|
|
bool inBrackets = false;
|
|
UChar charactersOredTogether = 0;
|
|
|
|
if (patternPrefix) {
|
|
ASSERT(!isLineTerminator(patternPrefix));
|
|
ASSERT(patternPrefix != '/');
|
|
ASSERT(patternPrefix != '[');
|
|
record16(patternPrefix);
|
|
}
|
|
|
|
while (true) {
|
|
if (isLineTerminator(m_current) || atEnd()) {
|
|
m_buffer16.shrink(0);
|
|
JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
m_error = true;
|
|
m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
|
|
return token;
|
|
}
|
|
|
|
T prev = m_current;
|
|
|
|
shift();
|
|
|
|
if (prev == '/' && !lastWasEscape && !inBrackets)
|
|
break;
|
|
|
|
record16(prev);
|
|
orCharacter<T>(charactersOredTogether, prev);
|
|
|
|
if (lastWasEscape) {
|
|
lastWasEscape = false;
|
|
continue;
|
|
}
|
|
|
|
switch (prev) {
|
|
case '[':
|
|
inBrackets = true;
|
|
break;
|
|
case ']':
|
|
inBrackets = false;
|
|
break;
|
|
case '\\':
|
|
lastWasEscape = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
|
|
m_buffer16.shrink(0);
|
|
|
|
ASSERT(m_buffer8.isEmpty());
|
|
while (LIKELY(isLatin1(m_current)) && isIdentPart(static_cast<LChar>(m_current))) {
|
|
record8(static_cast<LChar>(m_current));
|
|
shift();
|
|
}
|
|
|
|
// Normally this would not be a lex error but dealing with surrogate pairs here is annoying and it's going to be an error anyway...
|
|
if (UNLIKELY(!isLatin1(m_current))) {
|
|
m_buffer8.shrink(0);
|
|
JSTokenType token = INVALID_IDENTIFIER_UNICODE_ERRORTOK;
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
m_error = true;
|
|
String codePoint = String::fromCodePoint(currentCodePoint());
|
|
if (!codePoint)
|
|
codePoint = "`invalid unicode character`";
|
|
m_lexErrorMessage = makeString("Invalid non-latin character in RexExp literal's flags '", getToken(*tokenRecord), codePoint, "'");
|
|
return token;
|
|
}
|
|
|
|
tokenData->flags = makeIdentifier(m_buffer8.data(), m_buffer8.size());
|
|
m_buffer8.shrink(0);
|
|
|
|
// Since RegExp always ends with /, m_atLineStart always becomes false.
|
|
m_atLineStart = false;
|
|
|
|
JSTokenType token = REGEXP;
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
return token;
|
|
}
|
|
|
|
template <typename T>
|
|
JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
|
|
{
|
|
JSTokenData* tokenData = &tokenRecord->m_data;
|
|
ASSERT(!m_error);
|
|
ASSERT(m_buffer16.isEmpty());
|
|
|
|
// Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
|
|
// So in this re-scan phase, shift() is not needed here.
|
|
StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
|
|
JSTokenType token = ERRORTOK;
|
|
if (UNLIKELY(result != StringParsedSuccessfully)) {
|
|
token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
|
|
m_error = true;
|
|
} else
|
|
token = TEMPLATE;
|
|
|
|
// Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
|
|
m_atLineStart = false;
|
|
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
|
return token;
|
|
}
|
|
|
|
template <typename T>
|
|
void Lexer<T>::clear()
|
|
{
|
|
m_arena = nullptr;
|
|
|
|
Vector<LChar> newBuffer8;
|
|
m_buffer8.swap(newBuffer8);
|
|
|
|
Vector<UChar> newBuffer16;
|
|
m_buffer16.swap(newBuffer16);
|
|
|
|
Vector<UChar> newBufferForRawTemplateString16;
|
|
m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
|
|
|
|
m_isReparsingFunction = false;
|
|
}
|
|
|
|
// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
|
|
template class Lexer<LChar>;
|
|
template class Lexer<UChar>;
|
|
|
|
} // namespace JSC
|