mirror of
https://github.com/Samsung/escargot.git
synced 2026-06-22 10:01:50 +00:00
1941 lines
59 KiB
C++
1941 lines
59 KiB
C++
/*
|
|
* Copyright (c) 2016-present Samsung Electronics Co., Ltd
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
|
|
* USA
|
|
*/
|
|
|
|
#include "Escargot.h"
|
|
#include "parser/Lexer.h"
|
|
|
|
// These two must be the last because they overwrite the ASSERT macro.
|
|
#include "double-conversion.h"
|
|
#include "ieee.h"
|
|
|
|
using namespace Escargot::EscargotLexer;
|
|
|
|
namespace Escargot {
|
|
|
|
const char* Messages::InvalidHexEscapeSequence = "Invalid hexadecimal escape sequence";
|
|
const char* Messages::UnexpectedTokenIllegal = "Unexpected token ILLEGAL";
|
|
const char* Messages::UnterminatedRegExp = "Invalid regular expression: missing /";
|
|
const char* Messages::TemplateOctalLiteral = "Octal literals are not allowed in template strings.";
|
|
|
|
#define IDENT_RANGE_LONG 200
|
|
|
|
/* The largest code-point that an UTF16 surrogate pair can represent is 0x10ffff,
|
|
* so any codepoint above this can be a valid value for empty. The UINT32_MAX is
|
|
* chosen because it is a valid immediate for machine instructions. */
|
|
#define EMPTY_CODE_POINT UINT32_MAX
|
|
|
|
/* The largest octal value is 255, so any higher
|
|
* value can represent an invalid octal value. */
|
|
#define NON_OCTAL_VALUE 256
|
|
|
|
char EscargotLexer::g_asciiRangeCharMap[128] = {
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharLineTerminator,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharLineTerminator,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharWhiteSpace,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0
|
|
};
|
|
|
|
NEVER_INLINE bool EscargotLexer::isWhiteSpaceSlowCase(char16_t ch)
|
|
{
|
|
ASSERT(ch >= 0x80);
|
|
|
|
if (LIKELY(ch < 0x1680)) {
|
|
return (ch == 0xA0);
|
|
}
|
|
|
|
return (ch == 0x1680 || ch == 0x180E || ch == 0x2000 || ch == 0x2001
|
|
|| ch == 0x2002 || ch == 0x2003 || ch == 0x2004 || ch == 0x2005 || ch == 0x2006
|
|
|| ch == 0x2007 || ch == 0x2008 || ch == 0x2009 || ch == 0x200A || ch == 0x202F
|
|
|| ch == 0x205F || ch == 0x3000 || ch == 0xFEFF);
|
|
}
|
|
|
|
/* Starting codepoints of identifier ranges. */
|
|
static const uint16_t identRangeStart[429] = {
|
|
170, 181, 183, 186, 192, 216, 248, 710, 736, 748, 750, 768, 886, 890, 895, 902, 908, 910, 931, 1015, 1155, 1162,
|
|
1329, 1369, 1377, 1425, 1471, 1473, 1476, 1479, 1488, 1520, 1552, 1568, 1646, 1749, 1759, 1770, 1791, 1808, 1869,
|
|
1984, 2042, 2048, 2112, 2208, 2276, 2406, 2417, 2437, 2447, 2451, 2474, 2482, 2486, 2492, 2503, 2507, 2519, 2524,
|
|
2527, 2534, 2561, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, 2631, 2635, 2641, 2649, 2654, 2662, 2689,
|
|
2693, 2703, 2707, 2730, 2738, 2741, 2748, 2759, 2763, 2768, 2784, 2790, 2817, 2821, 2831, 2835, 2858, 2866, 2869,
|
|
2876, 2887, 2891, 2902, 2908, 2911, 2918, 2929, 2946, 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006,
|
|
3014, 3018, 3024, 3031, 3046, 3072, 3077, 3086, 3090, 3114, 3133, 3142, 3146, 3157, 3160, 3168, 3174, 3201, 3205,
|
|
3214, 3218, 3242, 3253, 3260, 3270, 3274, 3285, 3294, 3296, 3302, 3313, 3329, 3333, 3342, 3346, 3389, 3398, 3402,
|
|
3415, 3424, 3430, 3450, 3458, 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3542, 3544, 3558, 3570, 3585, 3648, 3664,
|
|
3713, 3716, 3719, 3722, 3725, 3732, 3737, 3745, 3749, 3751, 3754, 3757, 3771, 3776, 3782, 3784, 3792, 3804, 3840,
|
|
3864, 3872, 3893, 3895, 3897, 3902, 3913, 3953, 3974, 3993, 4038, 4096, 4176, 4256, 4295, 4301, 4304, 4348, 4682,
|
|
4688, 4696, 4698, 4704, 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, 4957, 4969, 4992, 5024, 5121,
|
|
5743, 5761, 5792, 5870, 5888, 5902, 5920, 5952, 5984, 5998, 6002, 6016, 6103, 6108, 6112, 6155, 6160, 6176, 6272,
|
|
6320, 6400, 6432, 6448, 6470, 6512, 6528, 6576, 6608, 6656, 6688, 6752, 6783, 6800, 6823, 6832, 6912, 6992, 7019,
|
|
7040, 7168, 7232, 7245, 7376, 7380, 7416, 7424, 7676, 7960, 7968, 8008, 8016, 8025, 8027, 8029, 8031, 8064, 8118,
|
|
8126, 8130, 8134, 8144, 8150, 8160, 8178, 8182, 8204, 8255, 8276, 8305, 8319, 8336, 8400, 8417, 8421, 8450, 8455,
|
|
8458, 8469, 8472, 8484, 8486, 8488, 8490, 8508, 8517, 8526, 8544, 11264, 11312, 11360, 11499, 11520, 11559, 11565,
|
|
11568, 11631, 11647, 11680, 11688, 11696, 11704, 11712, 11720, 11728, 11736, 11744, 12293, 12321, 12337, 12344,
|
|
12353, 12441, 12449, 12540, 12549, 12593, 12704, 12784, 13312, 19968, 40960, 42192, 42240, 42512, 42560, 42612,
|
|
42623, 42655, 42775, 42786, 42891, 42896, 42928, 42999, 43072, 43136, 43216, 43232, 43259, 43264, 43312, 43360,
|
|
43392, 43471, 43488, 43520, 43584, 43600, 43616, 43642, 43739, 43744, 43762, 43777, 43785, 43793, 43808, 43816,
|
|
43824, 43868, 43876, 43968, 44012, 44016, 44032, 55216, 55243, 63744, 64112, 64256, 64275, 64285, 64298, 64312,
|
|
64318, 64320, 64323, 64326, 64467, 64848, 64914, 65008, 65024, 65056, 65075, 65101, 65136, 65142, 65296, 65313,
|
|
65343, 65345, 65382, 65474, 65482, 65490, 65498, 65535
|
|
};
|
|
|
|
/* Lengths of identifier ranges. */
|
|
static const uint8_t identRangeLength[428] = {
|
|
1, 1, 1, 1, 23, 31, 200, 12, 5, 1, 1, 117, 2, 4, 1, 5, 1, 20, 83, 139, 5, 166, 38, 1, 39, 45, 1, 2, 2, 1, 27, 3,
|
|
11, 74, 102, 8, 10, 19, 1, 59, 101, 54, 1, 46, 28, 19, 128, 10, 19, 8, 2, 22, 7, 1, 4, 9, 2, 4, 1, 2, 5, 12, 3, 6,
|
|
2, 22, 7, 2, 2, 2, 1, 5, 2, 3, 1, 4, 1, 16, 3, 9, 3, 22, 7, 2, 5, 10, 3, 3, 1, 4, 10, 3, 8, 2, 22, 7, 2, 5, 9, 2,
|
|
3, 2, 2, 5, 10, 1, 2, 6, 3, 4, 2, 1, 2, 2, 3, 12, 5, 3, 4, 1, 1, 10, 4, 8, 3, 23, 16, 8, 3, 4, 2, 2, 4, 10, 3, 8,
|
|
3, 23, 10, 5, 9, 3, 4, 2, 1, 4, 10, 2, 3, 8, 3, 41, 8, 3, 5, 1, 4, 10, 6, 2, 18, 24, 9, 1, 7, 1, 6, 1, 8, 10, 2,
|
|
58, 15, 10, 2, 1, 2, 1, 1, 4, 7, 3, 1, 1, 2, 13, 3, 5, 1, 6, 10, 4, 1, 2, 10, 1, 1, 1, 10, 36, 20, 18, 36, 1, 74,
|
|
78, 38, 1, 1, 43, 201, 4, 7, 1, 4, 41, 4, 33, 4, 7, 1, 4, 15, 57, 4, 67, 3, 9, 16, 85, 202, 17, 26, 75, 11, 13, 7,
|
|
21, 20, 13, 3, 2, 84, 1, 2, 10, 3, 10, 88, 43, 70, 31, 12, 12, 40, 5, 44, 26, 11, 28, 63, 29, 11, 10, 1, 14, 76,
|
|
10, 9, 116, 56, 10, 49, 3, 35, 2, 203, 204, 6, 38, 6, 8, 1, 1, 1, 31, 53, 7, 1, 3, 7, 4, 6, 13, 3, 7, 2, 2, 1, 1,
|
|
1, 13, 13, 1, 12, 1, 1, 10, 1, 6, 1, 1, 1, 16, 4, 5, 1, 41, 47, 47, 133, 9, 38, 1, 1, 56, 1, 24, 7, 7, 7, 7, 7, 7,
|
|
7, 7, 32, 3, 15, 5, 5, 86, 7, 90, 4, 41, 94, 27, 16, 205, 206, 207, 46, 208, 28, 48, 10, 31, 83, 9, 103, 4, 30, 2,
|
|
49, 52, 69, 10, 24, 1, 46, 36, 29, 65, 11, 31, 55, 14, 10, 23, 73, 3, 16, 5, 6, 6, 6, 7, 7, 43, 4, 2, 43, 2, 10,
|
|
209, 23, 49, 210, 106, 7, 5, 12, 13, 5, 1, 2, 2, 108, 211, 64, 54, 12, 16, 14, 2, 3, 5, 135, 10, 26, 1, 26, 89, 6,
|
|
6, 6, 3
|
|
};
|
|
|
|
/* Lengths of identifier ranges greater than IDENT_RANGE_LONG. */
|
|
static const uint16_t identRangeLongLength[12] = {
|
|
458, 333, 620, 246, 282, 6582, 20941, 1165, 269, 11172, 366, 363
|
|
};
|
|
|
|
static NEVER_INLINE bool isIdentifierPartSlow(char32_t ch)
|
|
{
|
|
int bottom = 0;
|
|
int top = (sizeof(identRangeStart) / sizeof(uint16_t)) - 1;
|
|
|
|
while (true) {
|
|
int middle = (bottom + top) >> 1;
|
|
char32_t rangeStart = identRangeStart[middle];
|
|
|
|
if (ch >= rangeStart) {
|
|
if (ch < identRangeStart[middle + 1]) {
|
|
char32_t length = identRangeLength[middle];
|
|
|
|
if (UNLIKELY(length >= IDENT_RANGE_LONG)) {
|
|
length = identRangeLongLength[length - IDENT_RANGE_LONG];
|
|
}
|
|
return ch < rangeStart + length;
|
|
}
|
|
|
|
bottom = middle + 1;
|
|
} else {
|
|
top = middle;
|
|
}
|
|
|
|
if (bottom == top) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isIdentifierPart(char32_t ch)
|
|
{
|
|
if (LIKELY(ch < 128)) {
|
|
return g_asciiRangeCharMap[ch] & LexerIsCharIdent;
|
|
}
|
|
|
|
return isIdentifierPartSlow(ch);
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isIdentifierStart(char32_t ch)
|
|
{
|
|
if (LIKELY(ch < 128)) {
|
|
return g_asciiRangeCharMap[ch] & LexerIsCharIdentStart;
|
|
}
|
|
|
|
return isIdentifierPartSlow(ch);
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isDecimalDigit(char16_t ch)
|
|
{
|
|
return (ch >= '0' && ch <= '9');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isHexDigit(char16_t ch)
|
|
{
|
|
return isDecimalDigit(ch) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isOctalDigit(char16_t ch)
|
|
{
|
|
return (ch >= '0' && ch <= '7');
|
|
}
|
|
|
|
static ALWAYS_INLINE char16_t octalValue(char16_t ch)
|
|
{
|
|
ASSERT(isOctalDigit(ch));
|
|
return ch - '0';
|
|
}
|
|
|
|
static ALWAYS_INLINE uint8_t toHexNumericValue(char16_t ch)
|
|
{
|
|
return ch < 'A' ? ch - '0' : ((ch - 'A' + 10) & 0xF);
|
|
}
|
|
|
|
static int hexValue(char16_t ch)
|
|
{
|
|
if (ch >= '0' && ch <= '9') {
|
|
return ch - '0';
|
|
}
|
|
|
|
ASSERT((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
|
|
|
|
return (ch | 0x20) - ('a' - 10);
|
|
}
|
|
|
|
struct ParserCharPiece {
|
|
char16_t data[3];
|
|
size_t length;
|
|
|
|
ParserCharPiece(const char32_t a)
|
|
{
|
|
if (a < 0x10000) {
|
|
data[0] = a;
|
|
data[1] = 0;
|
|
length = 1;
|
|
} else {
|
|
data[0] = (char16_t)(0xD800 + ((a - 0x10000) >> 10));
|
|
data[1] = (char16_t)(0xDC00 + ((a - 0x10000) & 1023));
|
|
data[2] = 0;
|
|
length = 2;
|
|
}
|
|
}
|
|
};
|
|
|
|
AtomicString keywordToString(::Escargot::Context* ctx, KeywordKind keyword)
|
|
{
|
|
switch (keyword) {
|
|
case IfKeyword:
|
|
return ctx->staticStrings().stringIf;
|
|
case InKeyword:
|
|
return ctx->staticStrings().stringIn;
|
|
case DoKeyword:
|
|
return ctx->staticStrings().stringDo;
|
|
case VarKeyword:
|
|
return ctx->staticStrings().stringVar;
|
|
case ForKeyword:
|
|
return ctx->staticStrings().stringFor;
|
|
case NewKeyword:
|
|
return ctx->staticStrings().stringNew;
|
|
case TryKeyword:
|
|
return ctx->staticStrings().stringTry;
|
|
case ThisKeyword:
|
|
return ctx->staticStrings().stringThis;
|
|
case ElseKeyword:
|
|
return ctx->staticStrings().stringElse;
|
|
case CaseKeyword:
|
|
return ctx->staticStrings().stringCase;
|
|
case VoidKeyword:
|
|
return ctx->staticStrings().stringVoid;
|
|
case WithKeyword:
|
|
return ctx->staticStrings().stringWith;
|
|
case EnumKeyword:
|
|
return ctx->staticStrings().stringEnum;
|
|
case WhileKeyword:
|
|
return ctx->staticStrings().stringWhile;
|
|
case BreakKeyword:
|
|
return ctx->staticStrings().stringBreak;
|
|
case CatchKeyword:
|
|
return ctx->staticStrings().stringCatch;
|
|
case ThrowKeyword:
|
|
return ctx->staticStrings().stringThrow;
|
|
case ConstKeyword:
|
|
return ctx->staticStrings().stringConst;
|
|
case ClassKeyword:
|
|
return ctx->staticStrings().stringClass;
|
|
case SuperKeyword:
|
|
return ctx->staticStrings().stringSuper;
|
|
case ReturnKeyword:
|
|
return ctx->staticStrings().stringReturn;
|
|
case TypeofKeyword:
|
|
return ctx->staticStrings().stringTypeof;
|
|
case DeleteKeyword:
|
|
return ctx->staticStrings().stringDelete;
|
|
case SwitchKeyword:
|
|
return ctx->staticStrings().stringSwitch;
|
|
case ExportKeyword:
|
|
return ctx->staticStrings().stringExport;
|
|
case ImportKeyword:
|
|
return ctx->staticStrings().stringImport;
|
|
case DefaultKeyword:
|
|
return ctx->staticStrings().stringDefault;
|
|
case FinallyKeyword:
|
|
return ctx->staticStrings().stringFinally;
|
|
case ExtendsKeyword:
|
|
return ctx->staticStrings().stringExtends;
|
|
case FunctionKeyword:
|
|
return ctx->staticStrings().function;
|
|
case ContinueKeyword:
|
|
return ctx->staticStrings().stringContinue;
|
|
case DebuggerKeyword:
|
|
return ctx->staticStrings().stringDebugger;
|
|
case InstanceofKeyword:
|
|
return ctx->staticStrings().stringInstanceof;
|
|
case ImplementsKeyword:
|
|
return ctx->staticStrings().implements;
|
|
case InterfaceKeyword:
|
|
return ctx->staticStrings().interface;
|
|
case PackageKeyword:
|
|
return ctx->staticStrings().package;
|
|
case PrivateKeyword:
|
|
return ctx->staticStrings().stringPrivate;
|
|
case ProtectedKeyword:
|
|
return ctx->staticStrings().stringProtected;
|
|
case PublicKeyword:
|
|
return ctx->staticStrings().stringPublic;
|
|
case StaticKeyword:
|
|
return ctx->staticStrings().stringStatic;
|
|
case YieldKeyword:
|
|
return ctx->staticStrings().yield;
|
|
case LetKeyword:
|
|
return ctx->staticStrings().let;
|
|
default:
|
|
ASSERT_NOT_REACHED();
|
|
return ctx->staticStrings().stringError;
|
|
}
|
|
}
|
|
|
|
void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* description, ErrorObject::Code code)
|
|
{
|
|
UTF16StringDataNonGCStd msg = u"Line ";
|
|
const size_t bufferLength = 64;
|
|
char lineStringBuf[bufferLength];
|
|
char* bufPtr = lineStringBuf + bufferLength - 2;
|
|
|
|
/* Adds ": " at the end. */
|
|
bufPtr[0] = ':';
|
|
bufPtr[1] = ' ';
|
|
|
|
size_t value = line;
|
|
do {
|
|
ASSERT(bufPtr > lineStringBuf);
|
|
--bufPtr;
|
|
*bufPtr = value % 10 + '0';
|
|
value /= 10;
|
|
} while (value > 0);
|
|
|
|
msg += UTF16StringDataNonGCStd(bufPtr, lineStringBuf + bufferLength);
|
|
|
|
if (description->length()) {
|
|
msg += UTF16StringDataNonGCStd(description->toUTF16StringData().data());
|
|
}
|
|
|
|
esprima::Error* error = new (NoGC) esprima::Error(new UTF16String(msg.data(), msg.length()));
|
|
error->index = index;
|
|
error->lineNumber = line;
|
|
error->column = col;
|
|
error->description = description;
|
|
error->errorCode = code;
|
|
|
|
throw * error;
|
|
};
|
|
|
|
ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const
|
|
{
|
|
return ParserStringView(source, this->start, this->end);
|
|
}
|
|
|
|
StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const
|
|
{
|
|
return StringView(source, this->start, this->end);
|
|
}
|
|
|
|
ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source)
|
|
{
|
|
return ParserStringView(source, this->start, this->end);
|
|
}
|
|
|
|
StringView Scanner::ScannerResult::relatedSource(const StringView& source)
|
|
{
|
|
return StringView(source, this->start, this->end);
|
|
}
|
|
|
|
Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance)
|
|
{
|
|
if (this->type == Token::KeywordToken) {
|
|
return keywordToString(scannerInstance->escargotContext, this->valueKeywordKind).string();
|
|
}
|
|
|
|
if (this->hasAllocatedString) {
|
|
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
|
|
constructStringLiteral(scannerInstance);
|
|
}
|
|
return this->valueStringLiteralData.m_stringIfNewlyAllocated;
|
|
}
|
|
|
|
return new StringView(scannerInstance->sourceAsNormalView, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
|
|
}
|
|
|
|
ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
|
|
{
|
|
if (this->type == Token::KeywordToken) {
|
|
AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind);
|
|
return ParserStringView(as.string(), 0, as.string()->length());
|
|
}
|
|
if (this->hasAllocatedString) {
|
|
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
|
|
constructStringLiteral(scannerInstance);
|
|
}
|
|
return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
|
|
}
|
|
return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
|
|
}
|
|
|
|
double Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance)
|
|
{
|
|
if (this->hasNonComputedNumberLiteral) {
|
|
const auto& bd = scannerInstance->source.bufferAccessData();
|
|
char* buffer;
|
|
int length = this->end - this->start;
|
|
|
|
if (bd.has8BitContent) {
|
|
buffer = ((char*)bd.buffer) + this->start;
|
|
} else {
|
|
buffer = ALLOCA(this->end - this->start, char, ec);
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
buffer[i] = bd.uncheckedCharAtFor16Bit(i + this->start);
|
|
}
|
|
}
|
|
|
|
int lengthDummy;
|
|
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::ALLOW_HEX
|
|
| double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
|
|
| double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES,
|
|
0.0, double_conversion::Double::NaN(),
|
|
"Infinity", "NaN");
|
|
double ll = converter.StringToDouble(buffer, length, &lengthDummy);
|
|
|
|
this->valueNumber = ll;
|
|
this->hasNonComputedNumberLiteral = false;
|
|
}
|
|
return this->valueNumber;
|
|
}
|
|
|
|
void Scanner::ScannerResult::constructStringLiteralHelperAppendUTF16(Scanner* scannerInstance, char16_t ch, UTF16StringDataNonGCStd& stringUTF16, bool& isEveryCharLatin1)
|
|
{
|
|
switch (ch) {
|
|
case 'u':
|
|
case 'x': {
|
|
char32_t param;
|
|
if (scannerInstance->peekChar() == '{') {
|
|
++scannerInstance->index;
|
|
param = scannerInstance->scanUnicodeCodePointEscape();
|
|
} else {
|
|
param = scannerInstance->scanHexEscape(ch);
|
|
}
|
|
ParserCharPiece piece(param);
|
|
stringUTF16.append(piece.data, piece.data + piece.length);
|
|
if (piece.length != 1 || piece.data[0] >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
return;
|
|
}
|
|
case 'n':
|
|
stringUTF16 += '\n';
|
|
return;
|
|
case 'r':
|
|
stringUTF16 += '\r';
|
|
return;
|
|
case 't':
|
|
stringUTF16 += '\t';
|
|
return;
|
|
case 'b':
|
|
stringUTF16 += '\b';
|
|
return;
|
|
case 'f':
|
|
stringUTF16 += '\f';
|
|
return;
|
|
case 'v':
|
|
stringUTF16 += '\x0B';
|
|
return;
|
|
|
|
default:
|
|
if (ch && isOctalDigit(ch)) {
|
|
uint16_t octToDec = scannerInstance->octalToDecimal(ch, true);
|
|
stringUTF16 += octToDec;
|
|
ASSERT(octToDec < 256);
|
|
} else {
|
|
stringUTF16 += ch;
|
|
if (ch >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance)
|
|
{
|
|
size_t indexBackup = scannerInstance->index;
|
|
size_t lineNumberBackup = scannerInstance->lineNumber;
|
|
size_t lineStartBackup = scannerInstance->lineStart;
|
|
|
|
scannerInstance->index = this->start;
|
|
const size_t start = this->start;
|
|
char16_t quote = scannerInstance->peekChar();
|
|
ASSERT((quote == '\'' || quote == '"'));
|
|
// 'String literal must starts with a quote');
|
|
|
|
++scannerInstance->index;
|
|
bool isEveryCharLatin1 = true;
|
|
|
|
UTF16StringDataNonGCStd stringUTF16;
|
|
while (true) {
|
|
char16_t ch = scannerInstance->peekChar();
|
|
++scannerInstance->index;
|
|
if (ch == quote) {
|
|
quote = '\0';
|
|
break;
|
|
} else if (UNLIKELY(ch == '\\')) {
|
|
ch = scannerInstance->peekChar();
|
|
++scannerInstance->index;
|
|
if (!ch || !isLineTerminator(ch)) {
|
|
this->constructStringLiteralHelperAppendUTF16(scannerInstance, ch, stringUTF16, isEveryCharLatin1);
|
|
} else {
|
|
++scannerInstance->lineNumber;
|
|
char16_t bufferedChar = scannerInstance->peekChar();
|
|
if ((ch == '\r' && bufferedChar == '\n') || (ch == '\n' && bufferedChar == '\r')) {
|
|
++scannerInstance->index;
|
|
}
|
|
scannerInstance->lineStart = scannerInstance->index;
|
|
}
|
|
} else if (UNLIKELY(isLineTerminator(ch))) {
|
|
break;
|
|
} else {
|
|
stringUTF16 += ch;
|
|
if (ch >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
scannerInstance->index = indexBackup;
|
|
scannerInstance->lineNumber = lineNumberBackup;
|
|
scannerInstance->lineStart = lineStartBackup;
|
|
|
|
String* newStr;
|
|
if (isEveryCharLatin1) {
|
|
newStr = new Latin1String(stringUTF16.data(), stringUTF16.length());
|
|
} else {
|
|
newStr = new UTF16String(stringUTF16.data(), stringUTF16.length());
|
|
}
|
|
this->valueStringLiteralData.m_stringIfNewlyAllocated = newStr;
|
|
}
|
|
|
|
Scanner::Scanner(::Escargot::Context* escargotContext, StringView code, size_t startLine, size_t startColumn)
|
|
: source(code, 0, code.length())
|
|
, sourceAsNormalView(code)
|
|
, escargotContext(escargotContext)
|
|
, sourceCodeAccessData(code.bufferAccessData())
|
|
, length(code.length())
|
|
, index(0)
|
|
, lineNumber(startLine)
|
|
, lineStart(startColumn)
|
|
{
|
|
ASSERT(escargotContext != nullptr);
|
|
// trackComment = false;
|
|
}
|
|
|
|
void Scanner::skipSingleLineComment(void)
|
|
{
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
|
|
if (isLineTerminator(ch)) {
|
|
if (ch == 13 && this->peekCharWithoutEOF() == 10) {
|
|
++this->index;
|
|
}
|
|
++this->lineNumber;
|
|
this->lineStart = this->index;
|
|
// return comments;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Scanner::skipMultiLineComment(void)
|
|
{
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
|
|
if (isLineTerminator(ch)) {
|
|
if (ch == 0x0D && this->peekCharWithoutEOF() == 0x0A) {
|
|
++this->index;
|
|
}
|
|
++this->lineNumber;
|
|
this->lineStart = this->index;
|
|
} else if (ch == 0x2A && this->peekCharWithoutEOF() == 0x2F) {
|
|
// Block comment ends with '*/'.
|
|
++this->index;
|
|
return;
|
|
}
|
|
}
|
|
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
char32_t Scanner::scanHexEscape(char prefix)
|
|
{
|
|
size_t len = (prefix == 'u') ? 4 : 2;
|
|
char32_t code = 0;
|
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
if (!this->eof() && isHexDigit(this->peekCharWithoutEOF())) {
|
|
code = code * 16 + hexValue(this->peekCharWithoutEOF());
|
|
++this->index;
|
|
} else {
|
|
return EMPTY_CODE_POINT;
|
|
}
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
char32_t Scanner::scanUnicodeCodePointEscape()
|
|
{
|
|
// At least, one hex digit is required.
|
|
if (this->eof() || this->peekCharWithoutEOF() == '}') {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
char32_t code = 0;
|
|
char16_t ch;
|
|
|
|
while (!this->eof()) {
|
|
ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (!isHexDigit(ch)) {
|
|
break;
|
|
}
|
|
code = code * 16 + hexValue(ch);
|
|
}
|
|
|
|
if (code > 0x10FFFF || ch != '}') {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
Scanner::ScanIDResult Scanner::getIdentifier()
|
|
{
|
|
const size_t start = this->index;
|
|
++this->index;
|
|
while (UNLIKELY(!this->eof())) {
|
|
const char16_t ch = this->peekCharWithoutEOF();
|
|
if (UNLIKELY(ch == 0x5C)) {
|
|
// Blackslash (U+005C) marks Unicode escape sequence.
|
|
this->index = start;
|
|
return this->getComplexIdentifier();
|
|
} else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) {
|
|
// Need to handle surrogate pairs.
|
|
this->index = start;
|
|
return this->getComplexIdentifier();
|
|
}
|
|
if (isIdentifierPart(ch)) {
|
|
++this->index;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
const auto& srcData = this->source.bufferAccessData();
|
|
StringBufferAccessData ad(srcData.has8BitContent, this->index - start,
|
|
srcData.has8BitContent ? reinterpret_cast<void*>(((LChar*)srcData.buffer) + start) : reinterpret_cast<void*>(((char16_t*)srcData.buffer) + start));
|
|
|
|
return std::make_tuple(ad, nullptr);
|
|
}
|
|
|
|
Scanner::ScanIDResult Scanner::getComplexIdentifier()
|
|
{
|
|
char16_t cp = this->codePointAt(this->index);
|
|
ParserCharPiece piece = ParserCharPiece(cp);
|
|
UTF16StringDataNonGCStd id(piece.data, piece.length);
|
|
this->index += id.length();
|
|
|
|
// '\u' (U+005C, U+0075) denotes an escaped character.
|
|
char32_t ch;
|
|
if (cp == 0x5C) {
|
|
if (this->peekChar() != 0x75) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
++this->index;
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
ch = this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
ch = this->scanHexEscape('u');
|
|
cp = ch;
|
|
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
id = ch;
|
|
}
|
|
|
|
while (!this->eof()) {
|
|
cp = this->codePointAt(this->index);
|
|
if (!isIdentifierPart(cp)) {
|
|
break;
|
|
}
|
|
|
|
// ch = Character.fromCodePoint(cp);
|
|
ch = cp;
|
|
|
|
if (this->peekChar() >= 0xD800 && this->peekChar() < 0xDFFF) {
|
|
ch = peekChar();
|
|
++this->index;
|
|
char32_t ch2 = this->peekChar();
|
|
if (U16_IS_TRAIL(ch2)) {
|
|
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
|
|
}
|
|
--this->index;
|
|
}
|
|
piece = ParserCharPiece(ch);
|
|
id += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
this->index += piece.length;
|
|
|
|
// '\u' (U+005C, U+0075) denotes an escaped character.
|
|
if (cp == 0x5C) {
|
|
// id = id.substr(0, id.length - 1);
|
|
id.erase(id.length() - 1);
|
|
|
|
if (this->peekChar() != 0x75) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
++this->index;
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
ch = this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
ch = this->scanHexEscape('u');
|
|
cp = ch;
|
|
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierPart(cp)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
piece = ParserCharPiece(ch);
|
|
id += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
}
|
|
}
|
|
|
|
String* str = new UTF16String(id.data(), id.length());
|
|
return std::make_tuple(str->bufferAccessData(), str);
|
|
}
|
|
|
|
uint16_t Scanner::octalToDecimal(char16_t ch, bool octal)
|
|
{
|
|
// \0 is not octal escape sequence
|
|
char16_t code = octalValue(ch);
|
|
|
|
octal |= (ch != '0');
|
|
|
|
if (!this->eof() && isOctalDigit(this->peekChar())) {
|
|
octal = true;
|
|
code = code * 8 + octalValue(this->peekChar());
|
|
++this->index;
|
|
|
|
// 3 digits are only allowed when string starts
|
|
// with 0, 1, 2, 3
|
|
// if ('0123'.indexOf(ch) >= 0 && !this->eof() && Character.isOctalDigit(this->source.charCodeAt(this->index))) {
|
|
if ((ch >= '0' && ch <= '3') && !this->eof() && isOctalDigit(this->peekChar())) {
|
|
code = code * 8 + octalValue(this->peekChar());
|
|
++this->index;
|
|
}
|
|
}
|
|
|
|
ASSERT(!octal || code < NON_OCTAL_VALUE);
|
|
return octal ? code : NON_OCTAL_VALUE;
|
|
};
|
|
|
|
void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)
|
|
{
|
|
const size_t start = this->index;
|
|
PunctuatorKind kind;
|
|
// Check for most common single-character punctuators.
|
|
++this->index;
|
|
|
|
switch (ch) {
|
|
case '(':
|
|
kind = LeftParenthesis;
|
|
break;
|
|
|
|
case '{':
|
|
kind = LeftBrace;
|
|
break;
|
|
|
|
case '.':
|
|
kind = Period;
|
|
if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
|
|
// Spread operator "..."
|
|
this->index += 2;
|
|
kind = PeriodPeriodPeriod;
|
|
}
|
|
break;
|
|
|
|
case '}':
|
|
kind = RightBrace;
|
|
break;
|
|
case ')':
|
|
kind = RightParenthesis;
|
|
break;
|
|
case ';':
|
|
kind = SemiColon;
|
|
break;
|
|
case ',':
|
|
kind = Comma;
|
|
break;
|
|
case '[':
|
|
kind = LeftSquareBracket;
|
|
break;
|
|
case ']':
|
|
kind = RightSquareBracket;
|
|
break;
|
|
case ':':
|
|
kind = Colon;
|
|
break;
|
|
case '?':
|
|
kind = GuessMark;
|
|
break;
|
|
case '~':
|
|
kind = Wave;
|
|
break;
|
|
|
|
case '>':
|
|
ch = this->peekChar();
|
|
kind = RightInequality;
|
|
|
|
if (ch == '>') {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
kind = RightShift;
|
|
|
|
if (ch == '>') {
|
|
++this->index;
|
|
kind = UnsignedRightShift;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = UnsignedRightShiftEqual;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = RightShiftEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = RightInequalityEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '<':
|
|
ch = this->peekChar();
|
|
kind = LeftInequality;
|
|
|
|
if (ch == '<') {
|
|
++this->index;
|
|
kind = LeftShift;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = LeftShiftEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = LeftInequalityEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '=':
|
|
ch = this->peekChar();
|
|
kind = Substitution;
|
|
|
|
if (ch == '=') {
|
|
++this->index;
|
|
kind = Equal;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = StrictEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '>') {
|
|
kind = Arrow;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '!':
|
|
kind = ExclamationMark;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = NotEqual;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = NotStrictEqual;
|
|
++this->index;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '&':
|
|
ch = this->peekChar();
|
|
kind = BitwiseAnd;
|
|
|
|
if (ch == '&') {
|
|
kind = LogicalAnd;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = BitwiseAndEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '|':
|
|
ch = this->peekChar();
|
|
kind = BitwiseOr;
|
|
|
|
if (ch == '|') {
|
|
kind = LogicalOr;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = BitwiseOrEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '^':
|
|
kind = BitwiseXor;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = BitwiseXorEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '+':
|
|
ch = this->peekChar();
|
|
kind = Plus;
|
|
|
|
if (ch == '+') {
|
|
kind = PlusPlus;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = PlusEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '-':
|
|
ch = this->peekChar();
|
|
kind = Minus;
|
|
|
|
if (ch == '-') {
|
|
kind = MinusMinus;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = MinusEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '*':
|
|
ch = this->peekChar();
|
|
kind = Multiply;
|
|
|
|
if (ch == '=') {
|
|
kind = MultiplyEqual;
|
|
++this->index;
|
|
} else if (ch == '*') {
|
|
kind = Exponentiation;
|
|
++this->index;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = ExponentiationEqual;
|
|
++this->index;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '/':
|
|
kind = Divide;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = DivideEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '%':
|
|
kind = Mod;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = ModEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
this->throwUnexpectedToken();
|
|
kind = PunctuatorKindEnd;
|
|
break;
|
|
}
|
|
|
|
token->setPunctuatorResult(this->lineNumber, this->lineStart, start, this->index, kind);
|
|
}
|
|
|
|
void Scanner::scanHexLiteral(Scanner::ScannerResult* token, size_t start)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
double numberDouble = 0.0;
|
|
bool shouldUseDouble = false;
|
|
bool scanned = false;
|
|
|
|
size_t shiftCount = 0;
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isHexDigit(ch)) {
|
|
break;
|
|
}
|
|
if (shouldUseDouble) {
|
|
numberDouble = numberDouble * 16 + toHexNumericValue(ch);
|
|
} else {
|
|
number = (number << 4) + toHexNumericValue(ch);
|
|
if (++shiftCount >= 16) {
|
|
shouldUseDouble = true;
|
|
numberDouble = number;
|
|
number = 0;
|
|
}
|
|
}
|
|
this->index++;
|
|
scanned = true;
|
|
}
|
|
|
|
if (!scanned) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (isIdentifierStart(this->peekChar())) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (shouldUseDouble) {
|
|
ASSERT(number == 0);
|
|
token->setNumericLiteralResult(numberDouble, this->lineNumber, this->lineStart, start, this->index, false);
|
|
} else {
|
|
ASSERT(numberDouble == 0.0);
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
|
|
}
|
|
}
|
|
|
|
void Scanner::scanBinaryLiteral(Scanner::ScannerResult* token, size_t start)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
bool scanned = false;
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (ch != '0' && ch != '1') {
|
|
break;
|
|
}
|
|
number = (number << 1) + ch - '0';
|
|
this->index++;
|
|
scanned = true;
|
|
}
|
|
|
|
if (!scanned) {
|
|
// only 0b or 0B
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
/* istanbul ignore else */
|
|
if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
|
|
}
|
|
|
|
void Scanner::scanOctalLiteral(Scanner::ScannerResult* token, char16_t prefix, size_t start)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
bool scanned = false;
|
|
bool octal = isOctalDigit(prefix);
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isOctalDigit(ch)) {
|
|
break;
|
|
}
|
|
number = (number << 3) + ch - '0';
|
|
this->index++;
|
|
scanned = true;
|
|
}
|
|
|
|
if (!octal && !scanned) {
|
|
// only 0o or 0O
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
char16_t ch = this->peekChar();
|
|
if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
|
|
token->octal = octal;
|
|
}
|
|
|
|
bool Scanner::isImplicitOctalLiteral()
|
|
{
|
|
// Implicit octal, unless there is a non-octal digit.
|
|
// (Annex B.1.1 on Numeric Literals)
|
|
for (size_t i = this->index + 1; i < this->length; ++i) {
|
|
const char16_t ch = this->sourceCharAt(i);
|
|
if (ch == '8' || ch == '9') {
|
|
return false;
|
|
}
|
|
if (!isOctalDigit(ch)) {
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Scanner::scanNumericLiteral(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
char16_t ch = this->peekChar();
|
|
char16_t startChar = ch;
|
|
ASSERT(isDecimalDigit(ch) || (ch == '.'));
|
|
// 'Numeric literal must start with a decimal digit or a decimal point');
|
|
|
|
bool seenDotOrE = false;
|
|
|
|
if (ch != '.') {
|
|
auto number = this->peekChar();
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
|
|
// Hex number starts with '0x'.
|
|
// Octal number starts with '0'.
|
|
// Octal number in ES6 starts with '0o'.
|
|
// Binary number in ES6 starts with '0b'.
|
|
if (number == '0') {
|
|
if (ch == 'x' || ch == 'X') {
|
|
++this->index;
|
|
return this->scanHexLiteral(token, start);
|
|
}
|
|
if (ch == 'b' || ch == 'B') {
|
|
++this->index;
|
|
return this->scanBinaryLiteral(token, start);
|
|
}
|
|
if (ch == 'o' || ch == 'O') {
|
|
++this->index;
|
|
return this->scanOctalLiteral(token, ch, start);
|
|
}
|
|
|
|
if (ch && isOctalDigit(ch) && this->isImplicitOctalLiteral()) {
|
|
return this->scanOctalLiteral(token, ch, start);
|
|
}
|
|
}
|
|
|
|
while (isDecimalDigit(this->peekChar())) {
|
|
++this->index;
|
|
}
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (ch == '.') {
|
|
seenDotOrE = true;
|
|
++this->index;
|
|
while (isDecimalDigit(this->peekChar())) {
|
|
++this->index;
|
|
}
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (ch == 'e' || ch == 'E') {
|
|
seenDotOrE = true;
|
|
++this->index;
|
|
|
|
ch = this->peekChar();
|
|
if (ch == '+' || ch == '-') {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (isDecimalDigit(ch)) {
|
|
do {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
} while (isDecimalDigit(ch));
|
|
} else {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
|
|
if (!this->eof() && isIdentifierStart(this->peekChar())) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
token->setNumericLiteralResult(0, this->lineNumber, this->lineStart, start, this->index, true);
|
|
if (startChar == '0' && !seenDotOrE && (this->index - start) > 1) {
|
|
token->startWithZero = true;
|
|
}
|
|
}
|
|
|
|
void Scanner::scanStringLiteral(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
char16_t quote = this->peekChar();
|
|
ASSERT((quote == '\'' || quote == '"'));
|
|
// 'String literal must starts with a quote');
|
|
|
|
++this->index;
|
|
bool octal = false;
|
|
bool isPlainCase = true;
|
|
|
|
while (LIKELY(!this->eof())) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (ch == quote) {
|
|
quote = '\0';
|
|
break;
|
|
} else if (UNLIKELY(ch == '\\')) {
|
|
ch = this->peekChar();
|
|
++this->index;
|
|
isPlainCase = false;
|
|
if (!ch || !isLineTerminator(ch)) {
|
|
switch (ch) {
|
|
case 'u':
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
this->scanUnicodeCodePointEscape();
|
|
} else if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
break;
|
|
case 'x':
|
|
if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
break;
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'b':
|
|
case 'f':
|
|
case 'v':
|
|
break;
|
|
|
|
default:
|
|
if (ch && isOctalDigit(ch)) {
|
|
octal |= (this->octalToDecimal(ch, false) != NON_OCTAL_VALUE);
|
|
} else if (isDecimalDigit(ch)) {
|
|
octal = true;
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
} else if (ch == '\n' && this->peekChar() == '\r') {
|
|
++this->index;
|
|
}
|
|
this->lineStart = this->index;
|
|
}
|
|
} else if (UNLIKELY(isLineTerminator(ch))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (quote != '\0') {
|
|
this->index = start;
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (isPlainCase) {
|
|
token->setResult(Token::StringLiteralToken, start + 1, this->index - 1, this->lineNumber, this->lineStart, start, this->index, octal);
|
|
} else {
|
|
// build string if needs
|
|
token->setResult(Token::StringLiteralToken, (String*)nullptr, this->lineNumber, this->lineStart, start, this->index, octal);
|
|
}
|
|
}
|
|
|
|
bool Scanner::isFutureReservedWord(const ParserStringView& id)
|
|
{
|
|
const StringBufferAccessData& data = id.bufferAccessData();
|
|
switch (data.length) {
|
|
case 4:
|
|
return data.equalsSameLength("enum");
|
|
case 5:
|
|
return data.equalsSameLength("super");
|
|
case 6:
|
|
return data.equalsSameLength("export") || data.equalsSameLength("import");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
// TODO apply rope-string
|
|
UTF16StringDataNonGCStd cooked;
|
|
UTF16StringDataNonGCStd raw;
|
|
bool terminated = false;
|
|
size_t start = this->index;
|
|
|
|
bool tail = false;
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (ch == '`') {
|
|
tail = true;
|
|
terminated = true;
|
|
break;
|
|
} else if (ch == '$') {
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
terminated = true;
|
|
break;
|
|
}
|
|
cooked += ch;
|
|
raw += ch;
|
|
} else if (ch == '\\') {
|
|
raw += ch;
|
|
ch = this->peekChar();
|
|
if (!isLineTerminator(ch)) {
|
|
auto currentIndex = this->index;
|
|
++this->index;
|
|
switch (ch) {
|
|
case 'n':
|
|
cooked += '\n';
|
|
break;
|
|
case 'r':
|
|
cooked += '\r';
|
|
break;
|
|
case 't':
|
|
cooked += '\t';
|
|
break;
|
|
case 'u':
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
cooked += this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
const size_t restore = this->index;
|
|
const char32_t unescaped = this->scanHexEscape(ch);
|
|
if (unescaped != EMPTY_CODE_POINT) {
|
|
ParserCharPiece piece(unescaped);
|
|
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
} else {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
}
|
|
break;
|
|
case 'x': {
|
|
const char32_t unescaped = this->scanHexEscape(ch);
|
|
if (unescaped == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
ParserCharPiece piece(unescaped);
|
|
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
break;
|
|
}
|
|
case 'b':
|
|
cooked += '\b';
|
|
break;
|
|
case 'f':
|
|
cooked += '\f';
|
|
break;
|
|
case 'v':
|
|
cooked += '\v';
|
|
break;
|
|
default:
|
|
if (ch == '0') {
|
|
if (isDecimalDigit(this->peekChar())) {
|
|
// Illegal: \01 \02 and so on
|
|
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
|
|
}
|
|
cooked += (char16_t)'\0';
|
|
} else if (isOctalDigit(ch)) {
|
|
// Illegal: \1 \2
|
|
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
|
|
} else {
|
|
cooked += ch;
|
|
}
|
|
break;
|
|
}
|
|
auto endIndex = this->index;
|
|
for (size_t i = currentIndex; i < endIndex; i++) {
|
|
raw += this->sourceCharAt(i);
|
|
}
|
|
} else {
|
|
++this->index;
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
}
|
|
if (ch == 0x2028 || ch == 0x2029) {
|
|
raw += ch;
|
|
} else {
|
|
raw += '\n';
|
|
}
|
|
this->lineStart = this->index;
|
|
}
|
|
} else if (isLineTerminator(ch)) {
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
}
|
|
if (ch == 0x2028 || ch == 0x2029) {
|
|
raw += ch;
|
|
cooked += ch;
|
|
} else {
|
|
raw += '\n';
|
|
cooked += '\n';
|
|
}
|
|
this->lineStart = this->index;
|
|
} else {
|
|
cooked += ch;
|
|
raw += ch;
|
|
}
|
|
}
|
|
|
|
if (!terminated) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
ScanTemplateResult* result = new ScanTemplateResult();
|
|
result->head = head;
|
|
result->tail = tail;
|
|
result->valueRaw = UTF16StringData(raw.data(), raw.length());
|
|
result->valueCooked = UTF16StringData(cooked.data(), cooked.length());
|
|
|
|
if (head) {
|
|
start--;
|
|
}
|
|
|
|
token->setTemplateTokenResult(result, this->lineNumber, this->lineStart, start, this->index);
|
|
}
|
|
|
|
String* Scanner::scanRegExpBody()
|
|
{
|
|
char16_t ch = this->peekChar();
|
|
ASSERT(ch == '/');
|
|
// assert(ch == '/', 'Regular expression literal must start with a slash');
|
|
|
|
// TODO apply rope-string
|
|
char16_t ch0 = this->peekChar();
|
|
++this->index;
|
|
UTF16StringDataNonGCStd str(&ch0, 1);
|
|
bool classMarker = false;
|
|
bool terminated = false;
|
|
|
|
while (!this->eof()) {
|
|
ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
str += ch;
|
|
if (ch == '\\') {
|
|
ch = this->peekChar();
|
|
++this->index;
|
|
// ECMA-262 7.8.5
|
|
if (isLineTerminator(ch)) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
}
|
|
str += ch;
|
|
} else if (isLineTerminator(ch)) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
} else if (classMarker) {
|
|
if (ch == ']') {
|
|
classMarker = false;
|
|
}
|
|
} else {
|
|
if (ch == '/') {
|
|
terminated = true;
|
|
break;
|
|
} else if (ch == '[') {
|
|
classMarker = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!terminated) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
}
|
|
|
|
// Exclude leading and trailing slash.
|
|
str = str.substr(1, str.length() - 2);
|
|
if (isAllASCII(str.data(), str.length())) {
|
|
return new ASCIIString(str.data(), str.length());
|
|
}
|
|
|
|
return new UTF16String(str.data(), str.length());
|
|
}
|
|
|
|
String* Scanner::scanRegExpFlags()
|
|
{
|
|
// UTF16StringData str = '';
|
|
UTF16StringDataNonGCStd flags;
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isIdentifierPart(ch)) {
|
|
break;
|
|
}
|
|
|
|
++this->index;
|
|
if (ch == '\\' && !this->eof()) {
|
|
ch = this->peekChar();
|
|
if (ch == 'u') {
|
|
++this->index;
|
|
const size_t restore = this->index;
|
|
char32_t ch32 = this->scanHexEscape('u');
|
|
if (ch32 != EMPTY_CODE_POINT) {
|
|
ParserCharPiece piece(ch32);
|
|
flags += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
/*
|
|
for (str += '\\u'; restore < this->index; ++restore) {
|
|
str += this->source[restore];
|
|
}*/
|
|
} else {
|
|
this->index = restore;
|
|
flags += 'u';
|
|
// str += '\\u';
|
|
}
|
|
this->throwUnexpectedToken();
|
|
} else {
|
|
// str += '\\';
|
|
this->throwUnexpectedToken();
|
|
}
|
|
} else {
|
|
flags += ch;
|
|
// str += ch;
|
|
}
|
|
}
|
|
|
|
if (isAllASCII(flags.data(), flags.length())) {
|
|
return new ASCIIString(flags.data(), flags.length());
|
|
}
|
|
|
|
return new UTF16String(flags.data(), flags.length());
|
|
}
|
|
|
|
void Scanner::scanRegExp(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
|
|
String* body = this->scanRegExpBody();
|
|
String* flags = this->scanRegExpFlags();
|
|
// const value = this->testRegExp(body.value, flags.value);
|
|
|
|
ScanRegExpResult result;
|
|
result.body = body;
|
|
result.flags = flags;
|
|
token->setResult(Token::RegularExpressionToken, this->lineNumber, this->lineStart, start, this->index);
|
|
token->valueRegexp = result;
|
|
}
|
|
|
|
// ECMA-262 11.6.2.1 Keywords
|
|
static ALWAYS_INLINE KeywordKind isKeyword(const StringBufferAccessData& data)
|
|
{
|
|
// 'const' is specialized as Keyword in V8.
|
|
// 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
|
|
// Some others are from future reserved words.
|
|
|
|
size_t length = data.length;
|
|
char16_t first = data.charAt(0);
|
|
char16_t second;
|
|
switch (first) {
|
|
case 'b':
|
|
if (length == 5 && data.equalsSameLength("break", 1)) {
|
|
return BreakKeyword;
|
|
}
|
|
break;
|
|
case 'c':
|
|
if (length == 4) {
|
|
if (data.equalsSameLength("case", 1)) {
|
|
return CaseKeyword;
|
|
}
|
|
} else if (length == 5) {
|
|
second = data.charAt(1);
|
|
if (second == 'a' && data.equalsSameLength("catch", 2)) {
|
|
return CatchKeyword;
|
|
} else if (second == 'o' && data.equalsSameLength("const", 2)) {
|
|
return ConstKeyword;
|
|
} else if (second == 'l' && data.equalsSameLength("class", 2)) {
|
|
return ClassKeyword;
|
|
}
|
|
} else if (length == 8 && data.equalsSameLength("continue", 1)) {
|
|
return ContinueKeyword;
|
|
}
|
|
break;
|
|
case 'd':
|
|
if (length == 8) {
|
|
if (data.equalsSameLength("debugger", 1)) {
|
|
return DebuggerKeyword;
|
|
}
|
|
} else if (length == 2) {
|
|
if (data.equalsSameLength("do", 1)) {
|
|
return DoKeyword;
|
|
}
|
|
} else if (length == 6) {
|
|
if (data.equalsSameLength("delete", 1)) {
|
|
return DeleteKeyword;
|
|
}
|
|
} else if (length == 7) {
|
|
if (data.equalsSameLength("default", 1)) {
|
|
return DefaultKeyword;
|
|
}
|
|
}
|
|
break;
|
|
case 'e':
|
|
if (length == 4) {
|
|
second = data.charAt(1);
|
|
if (second == 'l' && data.equalsSameLength("else", 2)) {
|
|
return ElseKeyword;
|
|
} else if (second == 'n' && data.equalsSameLength("enum", 2)) {
|
|
return EnumKeyword;
|
|
}
|
|
} else if (length == 6 && data.equalsSameLength("export", 1)) {
|
|
return ExportKeyword;
|
|
} else if (length == 7 && data.equalsSameLength("extends", 1)) {
|
|
return ExtendsKeyword;
|
|
}
|
|
break;
|
|
case 'f':
|
|
if (length == 3 && data.equalsSameLength("for", 1)) {
|
|
return ForKeyword;
|
|
} else if (length == 7 && data.equalsSameLength("finally", 1)) {
|
|
return FinallyKeyword;
|
|
} else if (length == 8 && data.equalsSameLength("function", 1)) {
|
|
return FunctionKeyword;
|
|
}
|
|
break;
|
|
case 'i':
|
|
if (length == 2) {
|
|
second = data.charAt(1);
|
|
if (second == 'f') {
|
|
return IfKeyword;
|
|
} else if (second == 'n') {
|
|
return InKeyword;
|
|
}
|
|
} else if (length == 6 && data.equalsSameLength("import", 1)) {
|
|
return ImportKeyword;
|
|
} else if (length == 10 && data.equalsSameLength("instanceof", 1)) {
|
|
return InstanceofKeyword;
|
|
}
|
|
break;
|
|
case 'l':
|
|
if (length == 3 && data.equalsSameLength("let", 1)) {
|
|
return LetKeyword;
|
|
}
|
|
break;
|
|
case 'n':
|
|
if (length == 3 && data.equalsSameLength("new", 1)) {
|
|
return NewKeyword;
|
|
}
|
|
break;
|
|
case 'r':
|
|
if (length == 6 && data.equalsSameLength("return", 1)) {
|
|
return ReturnKeyword;
|
|
}
|
|
break;
|
|
case 's':
|
|
if (length == 5 && data.equalsSameLength("super", 1)) {
|
|
return SuperKeyword;
|
|
} else if (length == 6 && data.equalsSameLength("switch", 1)) {
|
|
return SwitchKeyword;
|
|
}
|
|
break;
|
|
case 't':
|
|
switch (length) {
|
|
case 3:
|
|
if (data.equalsSameLength("try", 1)) {
|
|
return TryKeyword;
|
|
}
|
|
break;
|
|
case 4:
|
|
if (data.equalsSameLength("this", 1)) {
|
|
return ThisKeyword;
|
|
}
|
|
break;
|
|
case 5:
|
|
if (data.equalsSameLength("throw", 1)) {
|
|
return ThrowKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
if (data.equalsSameLength("typeof", 1)) {
|
|
return TypeofKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'v':
|
|
if (length == 3 && data.equalsSameLength("var", 1)) {
|
|
return VarKeyword;
|
|
} else if (length == 4 && data.equalsSameLength("void", 1)) {
|
|
return VoidKeyword;
|
|
}
|
|
break;
|
|
case 'w':
|
|
if (length == 4 && data.equalsSameLength("with", 1)) {
|
|
return WithKeyword;
|
|
} else if (length == 5 && data.equalsSameLength("while", 1)) {
|
|
return WhileKeyword;
|
|
}
|
|
break;
|
|
case 'y':
|
|
if (length == 5 && data.equalsSameLength("yield", 1)) {
|
|
return YieldKeyword;
|
|
}
|
|
break;
|
|
}
|
|
return NotKeyword;
|
|
}
|
|
|
|
ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
Token type;
|
|
const size_t start = this->index;
|
|
|
|
// Backslash (U+005C) starts an escaped character.
|
|
ScanIDResult id = UNLIKELY(ch0 == 0x5C) ? this->getComplexIdentifier() : this->getIdentifier();
|
|
const size_t end = this->index;
|
|
|
|
// There is no keyword or literal with only one character.
|
|
// Thus, it must be an identifier.
|
|
KeywordKind keywordKind;
|
|
const auto& data = std::get<0>(id);
|
|
if (data.length == 1) {
|
|
type = Token::IdentifierToken;
|
|
} else if ((keywordKind = isKeyword(data))) {
|
|
token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
|
|
return;
|
|
} else if (data.length == 4) {
|
|
if (data.equalsSameLength("null")) {
|
|
type = Token::NullLiteralToken;
|
|
} else if (data.equalsSameLength("true")) {
|
|
type = Token::BooleanLiteralToken;
|
|
} else {
|
|
type = Token::IdentifierToken;
|
|
}
|
|
} else if (data.length == 5 && data.equalsSameLength("false")) {
|
|
type = Token::BooleanLiteralToken;
|
|
} else {
|
|
type = Token::IdentifierToken;
|
|
}
|
|
|
|
if (UNLIKELY(std::get<1>(id) != nullptr)) {
|
|
token->setResult(type, std::get<1>(id), this->lineNumber, this->lineStart, start, end);
|
|
} else {
|
|
token->setResult(type, start, end, this->lineNumber, this->lineStart, start, end);
|
|
}
|
|
}
|
|
|
|
void Scanner::lex(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
if (UNLIKELY(this->eof())) {
|
|
token->setResult(Token::EOFToken, this->lineNumber, this->lineStart, this->index, this->index);
|
|
return;
|
|
}
|
|
|
|
const char16_t cp = this->peekCharWithoutEOF();
|
|
|
|
if (isIdentifierStart(cp)) {
|
|
goto ScanID;
|
|
}
|
|
// String literal starts with single quote (U+0027) or double quote (U+0022).
|
|
if (cp == 0x27 || cp == 0x22) {
|
|
this->scanStringLiteral(token);
|
|
return;
|
|
}
|
|
|
|
// Dot (.) U+002E can also start a floating-point number, hence the need
|
|
// to check the next character.
|
|
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
|
|
this->scanNumericLiteral(token);
|
|
return;
|
|
}
|
|
|
|
if (isDecimalDigit(cp)) {
|
|
this->scanNumericLiteral(token);
|
|
return;
|
|
}
|
|
|
|
if (UNLIKELY(cp == '`')) {
|
|
++this->index;
|
|
this->scanTemplate(token, true);
|
|
return;
|
|
}
|
|
|
|
// Possible identifier start in a surrogate pair.
|
|
if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) {
|
|
goto ScanID;
|
|
}
|
|
this->scanPunctuator(token, cp);
|
|
return;
|
|
|
|
ScanID:
|
|
this->scanIdentifier(token, cp);
|
|
return;
|
|
}
|
|
}
|