mirror of
https://github.com/Samsung/escargot.git
synced 2026-06-22 10:01:50 +00:00
2387 lines
71 KiB
C++
2387 lines
71 KiB
C++
/*
|
|
* Copyright (c) 2016-present Samsung Electronics Co., Ltd
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
|
|
* USA
|
|
*/
|
|
|
|
#include "Escargot.h"
|
|
#include "parser/Lexer.h"
|
|
#include "parser/UnicodeIdentifierTables.h"
|
|
#include "parser/esprima_cpp/ParserContext.h"
|
|
|
|
// These two must be the last because they overwrite the ASSERT macro.
|
|
#include "double-conversion.h"
|
|
#include "ieee.h"
|
|
|
|
using namespace Escargot::EscargotLexer;
|
|
|
|
namespace Escargot {
|
|
|
|
#define IDENT_RANGE_LONG 200
|
|
|
|
/* The largest code-point that an UTF16 surrogate pair can represent is 0x10ffff,
|
|
* so any codepoint above this can be a valid value for empty. The UINT32_MAX is
|
|
* chosen because it is a valid immediate for machine instructions. */
|
|
#define EMPTY_CODE_POINT UINT32_MAX
|
|
|
|
/* The largest octal value is 255, so any higher
|
|
* value can represent an invalid octal value. */
|
|
#define NON_OCTAL_VALUE 256
|
|
|
|
char EscargotLexer::g_asciiRangeCharMap[128] = {
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharLineTerminator,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharWhiteSpace,
|
|
LexerIsCharLineTerminator,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharWhiteSpace,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
LexerIsCharIdentStart | LexerIsCharIdent,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0
|
|
};
|
|
|
|
NEVER_INLINE bool EscargotLexer::isWhiteSpaceSlowCase(char16_t ch)
|
|
{
|
|
ASSERT(ch >= 0x80);
|
|
|
|
if (LIKELY(ch < 0x1680)) {
|
|
return (ch == 0xA0);
|
|
}
|
|
|
|
return (ch == 0x1680 || ch == 0x2000 || ch == 0x2001
|
|
|| ch == 0x2002 || ch == 0x2003 || ch == 0x2004 || ch == 0x2005 || ch == 0x2006
|
|
|| ch == 0x2007 || ch == 0x2008 || ch == 0x2009 || ch == 0x200A || ch == 0x202F
|
|
|| ch == 0x205F || ch == 0x3000 || ch == 0xFEFF);
|
|
}
|
|
|
|
static NEVER_INLINE bool isIdentifierPartSlow(char32_t ch)
|
|
{
|
|
int bottom = 0;
|
|
int top = (EscargotLexer::basic_plane_length / sizeof(uint16_t)) - 1;
|
|
|
|
while (true) {
|
|
int middle = (bottom + top) >> 1;
|
|
char32_t rangeStart = identRangeStart[middle];
|
|
|
|
if (ch >= rangeStart) {
|
|
if (ch < identRangeStart[middle + 1]) {
|
|
char32_t length = identRangeLength[middle];
|
|
|
|
if (UNLIKELY(length >= IDENT_RANGE_LONG)) {
|
|
length = identRangeLongLength[length - IDENT_RANGE_LONG];
|
|
}
|
|
return ch <= rangeStart + length;
|
|
}
|
|
|
|
bottom = middle + 1;
|
|
} else {
|
|
top = middle;
|
|
}
|
|
|
|
if (bottom == top) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
static NEVER_INLINE bool isIdentifierPartSlowSupplementary(char32_t ch)
|
|
{
|
|
int bottom = 0;
|
|
int top = (EscargotLexer::supplementary_plane_length / sizeof(uint32_t)) - 1;
|
|
|
|
while (true) {
|
|
int middle = (bottom + top) >> 1;
|
|
char32_t rangeStart = identRangeStartSupplementaryPlane[middle];
|
|
|
|
if (ch >= rangeStart) {
|
|
if (ch < identRangeStartSupplementaryPlane[middle + 1]) {
|
|
char32_t length = identRangeLengthSupplementaryPlane[middle];
|
|
return ch <= rangeStart + length;
|
|
}
|
|
|
|
bottom = middle + 1;
|
|
} else {
|
|
top = middle;
|
|
}
|
|
|
|
if (bottom == top) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isIdentifierPart(char32_t ch)
|
|
{
|
|
if (LIKELY(ch < 128)) {
|
|
return g_asciiRangeCharMap[ch] & LexerIsCharIdent;
|
|
}
|
|
|
|
return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch);
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isIdentifierStart(char32_t ch)
|
|
{
|
|
if (LIKELY(ch < 128)) {
|
|
return g_asciiRangeCharMap[ch] & LexerIsCharIdentStart;
|
|
}
|
|
|
|
return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch);
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isDecimalDigit(char16_t ch)
|
|
{
|
|
return (ch >= '0' && ch <= '9');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isDecimalDigitOrUnderscore(char16_t ch, bool& seenUnderScore)
|
|
{
|
|
if (UNLIKELY(ch == '_')) {
|
|
seenUnderScore = true;
|
|
return true;
|
|
}
|
|
return (ch >= '0' && ch <= '9');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isHexDigit(char16_t ch)
|
|
{
|
|
return isDecimalDigit(ch) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isHexDigitOrUnderscore(char16_t ch, bool& seenUnderScore)
|
|
{
|
|
return isDecimalDigitOrUnderscore(ch, seenUnderScore) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
|
|
}
|
|
|
|
static ALWAYS_INLINE bool isOctalDigit(char16_t ch)
|
|
{
|
|
return (ch >= '0' && ch <= '7');
|
|
}
|
|
|
|
static ALWAYS_INLINE char16_t octalValue(char16_t ch)
|
|
{
|
|
ASSERT(isOctalDigit(ch));
|
|
return ch - '0';
|
|
}
|
|
|
|
static ALWAYS_INLINE uint8_t toHexNumericValue(char16_t ch)
|
|
{
|
|
return ch < 'A' ? ch - '0' : ((ch - 'A' + 10) & 0xF);
|
|
}
|
|
|
|
static int hexValue(char16_t ch)
|
|
{
|
|
if (ch >= '0' && ch <= '9') {
|
|
return ch - '0';
|
|
}
|
|
|
|
ASSERT((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
|
|
|
|
return (ch | 0x20) - ('a' - 10);
|
|
}
|
|
|
|
struct ParserCharPiece {
|
|
char16_t data[3];
|
|
size_t length;
|
|
|
|
ParserCharPiece(const char32_t a)
|
|
{
|
|
if (a < 0x10000) {
|
|
data[0] = a;
|
|
data[1] = 0;
|
|
length = 1;
|
|
} else {
|
|
data[0] = (char16_t)(0xD800 + ((a - 0x10000) >> 10));
|
|
data[1] = (char16_t)(0xDC00 + ((a - 0x10000) & 1023));
|
|
data[2] = 0;
|
|
length = 2;
|
|
}
|
|
}
|
|
};
|
|
|
|
AtomicString keywordToString(::Escargot::Context* ctx, KeywordKind keyword)
|
|
{
|
|
switch (keyword) {
|
|
case IfKeyword:
|
|
return ctx->staticStrings().stringIf;
|
|
case InKeyword:
|
|
return ctx->staticStrings().stringIn;
|
|
case DoKeyword:
|
|
return ctx->staticStrings().stringDo;
|
|
case VarKeyword:
|
|
return ctx->staticStrings().var;
|
|
case ForKeyword:
|
|
return ctx->staticStrings().stringFor;
|
|
case NewKeyword:
|
|
return ctx->staticStrings().stringNew;
|
|
case TryKeyword:
|
|
return ctx->staticStrings().stringTry;
|
|
case ThisKeyword:
|
|
return ctx->staticStrings().stringThis;
|
|
case ElseKeyword:
|
|
return ctx->staticStrings().stringElse;
|
|
case CaseKeyword:
|
|
return ctx->staticStrings().stringCase;
|
|
case VoidKeyword:
|
|
return ctx->staticStrings().stringVoid;
|
|
case WithKeyword:
|
|
return ctx->staticStrings().with;
|
|
case EnumKeyword:
|
|
return ctx->staticStrings().stringEnum;
|
|
case WhileKeyword:
|
|
return ctx->staticStrings().stringWhile;
|
|
case BreakKeyword:
|
|
return ctx->staticStrings().stringBreak;
|
|
case CatchKeyword:
|
|
return ctx->staticStrings().stringCatch;
|
|
case ThrowKeyword:
|
|
return ctx->staticStrings().stringThrow;
|
|
case ConstKeyword:
|
|
return ctx->staticStrings().stringConst;
|
|
case ClassKeyword:
|
|
return ctx->staticStrings().stringClass;
|
|
case SuperKeyword:
|
|
return ctx->staticStrings().super;
|
|
case ReturnKeyword:
|
|
return ctx->staticStrings().stringReturn;
|
|
case TypeofKeyword:
|
|
return ctx->staticStrings().stringTypeof;
|
|
case DeleteKeyword:
|
|
return ctx->staticStrings().stringDelete;
|
|
case SwitchKeyword:
|
|
return ctx->staticStrings().stringSwitch;
|
|
case ExportKeyword:
|
|
return ctx->staticStrings().stringExport;
|
|
case ImportKeyword:
|
|
return ctx->staticStrings().stringImport;
|
|
case DefaultKeyword:
|
|
return ctx->staticStrings().stringDefault;
|
|
case FinallyKeyword:
|
|
return ctx->staticStrings().finally;
|
|
case ExtendsKeyword:
|
|
return ctx->staticStrings().extends;
|
|
case FunctionKeyword:
|
|
return ctx->staticStrings().function;
|
|
case ContinueKeyword:
|
|
return ctx->staticStrings().stringContinue;
|
|
case DebuggerKeyword:
|
|
return ctx->staticStrings().debugger;
|
|
case InstanceofKeyword:
|
|
return ctx->staticStrings().instanceof ;
|
|
case ImplementsKeyword:
|
|
return ctx->staticStrings().implements;
|
|
case InterfaceKeyword:
|
|
return ctx->staticStrings().interface;
|
|
case PackageKeyword:
|
|
return ctx->staticStrings().package;
|
|
case PrivateKeyword:
|
|
return ctx->staticStrings().stringPrivate;
|
|
case ProtectedKeyword:
|
|
return ctx->staticStrings().stringProtected;
|
|
case PublicKeyword:
|
|
return ctx->staticStrings().stringPublic;
|
|
case StaticKeyword:
|
|
return ctx->staticStrings().stringStatic;
|
|
case YieldKeyword:
|
|
return ctx->staticStrings().yield;
|
|
case LetKeyword:
|
|
return ctx->staticStrings().let;
|
|
case NullKeyword:
|
|
return ctx->staticStrings().null;
|
|
case TrueKeyword:
|
|
return ctx->staticStrings().stringTrue;
|
|
case FalseKeyword:
|
|
return ctx->staticStrings().stringFalse;
|
|
case GetKeyword:
|
|
return ctx->staticStrings().get;
|
|
case SetKeyword:
|
|
return ctx->staticStrings().set;
|
|
case EvalKeyword:
|
|
return ctx->staticStrings().eval;
|
|
case ArgumentsKeyword:
|
|
return ctx->staticStrings().arguments;
|
|
case OfKeyword:
|
|
return ctx->staticStrings().of;
|
|
case AsyncKeyword:
|
|
return ctx->staticStrings().async;
|
|
case AwaitKeyword:
|
|
return ctx->staticStrings().await;
|
|
case AsKeyword:
|
|
return ctx->staticStrings().as;
|
|
case FromKeyword:
|
|
return ctx->staticStrings().from;
|
|
default:
|
|
ASSERT_NOT_REACHED();
|
|
return ctx->staticStrings().error;
|
|
}
|
|
}
|
|
|
|
void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* description, ErrorObject::Code code)
|
|
{
|
|
UTF16StringDataNonGCStd msg = u"Line ";
|
|
const size_t bufferLength = 64;
|
|
char lineStringBuf[bufferLength];
|
|
char* bufPtr = lineStringBuf + bufferLength - 2;
|
|
|
|
/* Adds ": " at the end. */
|
|
bufPtr[0] = ':';
|
|
bufPtr[1] = ' ';
|
|
|
|
size_t value = line;
|
|
do {
|
|
ASSERT(bufPtr > lineStringBuf);
|
|
--bufPtr;
|
|
*bufPtr = value % 10 + '0';
|
|
value /= 10;
|
|
} while (value > 0);
|
|
|
|
msg += UTF16StringDataNonGCStd(bufPtr, lineStringBuf + bufferLength);
|
|
|
|
if (description->length()) {
|
|
msg += UTF16StringDataNonGCStd(description->toUTF16StringData().data());
|
|
}
|
|
|
|
esprima::Error* error = new (NoGC) esprima::Error(new UTF16String(msg.data(), msg.length()));
|
|
error->index = index;
|
|
error->lineNumber = line;
|
|
error->column = col;
|
|
error->description = description;
|
|
error->errorCode = code;
|
|
|
|
throw error;
|
|
};
|
|
|
|
ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const
|
|
{
|
|
return ParserStringView(source, this->start, this->end);
|
|
}
|
|
|
|
StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const
|
|
{
|
|
return StringView(source, this->start, this->end);
|
|
}
|
|
|
|
ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source)
|
|
{
|
|
return ParserStringView(source, this->start, this->end);
|
|
}
|
|
|
|
StringView Scanner::ScannerResult::relatedSource(const StringView& source)
|
|
{
|
|
return StringView(source, this->start, this->end);
|
|
}
|
|
|
|
Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance)
|
|
{
|
|
ASSERT(this->type == Token::StringLiteralToken);
|
|
|
|
if (UNLIKELY(this->hasAllocatedString)) {
|
|
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
|
|
constructStringLiteral(scannerInstance);
|
|
}
|
|
return this->valueStringLiteralData.m_stringIfNewlyAllocated;
|
|
}
|
|
|
|
// check if string is one of typeof strings
|
|
// we only consider the most common cases which are undefined, object, function
|
|
size_t start = this->valueStringLiteralData.m_start;
|
|
size_t end = this->valueStringLiteralData.m_end;
|
|
size_t length = end - start;
|
|
if (length > 5 && length < 10) {
|
|
ParserStringView str(scannerInstance->source, start, end);
|
|
switch (str.bufferedCharAt(0)) {
|
|
case 'o': {
|
|
if (length == 6 && str.equalsSameLength("object", 1)) {
|
|
return scannerInstance->escargotContext->staticStrings().object.string();
|
|
}
|
|
break;
|
|
}
|
|
case 'f': {
|
|
if (length == 8 && str.equalsSameLength("function", 1)) {
|
|
return scannerInstance->escargotContext->staticStrings().function.string();
|
|
}
|
|
break;
|
|
}
|
|
case 'u': {
|
|
if (length == 9 && str.equalsSameLength("undefined", 1)) {
|
|
return scannerInstance->escargotContext->staticStrings().undefined.string();
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
return new StringView(scannerInstance->sourceAsNormalView, start, end);
|
|
}
|
|
}
|
|
}
|
|
|
|
return new StringView(scannerInstance->sourceAsNormalView, start, end);
|
|
}
|
|
|
|
ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
|
|
{
|
|
if (this->type == Token::KeywordToken) {
|
|
AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind);
|
|
return ParserStringView(as.string(), 0, as.string()->length());
|
|
}
|
|
if (this->hasAllocatedString) {
|
|
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
|
|
constructStringLiteral(scannerInstance);
|
|
}
|
|
return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
|
|
}
|
|
return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
|
|
}
|
|
|
|
std::pair<Value, bool> Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance)
|
|
{
|
|
if (this->hasNonComputedNumberLiteral) {
|
|
const auto& bd = scannerInstance->source.bufferAccessData();
|
|
char* buffer;
|
|
int length = this->end - this->start;
|
|
|
|
if (UNLIKELY(this->hasNumberSeparatorOnNumberLiteral)) {
|
|
buffer = ALLOCA(this->end - this->start, char, ec);
|
|
int underScoreCount = 0;
|
|
for (int i = 0; i < length; i++) {
|
|
auto c = bd.charAt(i + this->start);
|
|
if (c == '_') {
|
|
underScoreCount++;
|
|
} else {
|
|
buffer[i - underScoreCount] = c;
|
|
}
|
|
}
|
|
length -= underScoreCount;
|
|
ASSERT(underScoreCount != 0);
|
|
} else {
|
|
if (bd.has8BitContent) {
|
|
buffer = ((char*)bd.buffer) + this->start;
|
|
} else {
|
|
buffer = ALLOCA(this->end - this->start, char, ec);
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
buffer[i] = bd.uncheckedCharAtFor16Bit(i + this->start);
|
|
}
|
|
}
|
|
}
|
|
|
|
// bigint case
|
|
if (UNLIKELY(buffer[length - 1] == 'n')) {
|
|
return std::make_pair(Value(BigInt::parseString(buffer, length - 1).value()), true);
|
|
}
|
|
|
|
int lengthDummy;
|
|
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::ALLOW_HEX
|
|
| double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
|
|
| double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES,
|
|
0.0, double_conversion::Double::NaN(),
|
|
"Infinity", "NaN");
|
|
double ll = converter.StringToDouble(buffer, length, &lengthDummy);
|
|
|
|
this->valueNumber = ll;
|
|
this->hasNonComputedNumberLiteral = false;
|
|
}
|
|
return std::make_pair(Value(this->valueNumber), false);
|
|
}
|
|
|
|
void Scanner::ScannerResult::constructStringLiteralHelperAppendUTF16(Scanner* scannerInstance, char16_t ch, UTF16StringDataNonGCStd& stringUTF16, bool& isEveryCharLatin1)
|
|
{
|
|
switch (ch) {
|
|
case 'u':
|
|
case 'x': {
|
|
char32_t param;
|
|
if (scannerInstance->peekChar() == '{') {
|
|
++scannerInstance->index;
|
|
param = scannerInstance->scanUnicodeCodePointEscape();
|
|
} else {
|
|
param = scannerInstance->scanHexEscape(ch);
|
|
}
|
|
ParserCharPiece piece(param);
|
|
stringUTF16.append(piece.data, piece.data + piece.length);
|
|
if (piece.length != 1 || piece.data[0] >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
return;
|
|
}
|
|
case 'n':
|
|
stringUTF16 += '\n';
|
|
return;
|
|
case 'r':
|
|
stringUTF16 += '\r';
|
|
return;
|
|
case 't':
|
|
stringUTF16 += '\t';
|
|
return;
|
|
case 'b':
|
|
stringUTF16 += '\b';
|
|
return;
|
|
case 'f':
|
|
stringUTF16 += '\f';
|
|
return;
|
|
case 'v':
|
|
stringUTF16 += '\x0B';
|
|
return;
|
|
|
|
default:
|
|
if (ch && isOctalDigit(ch)) {
|
|
uint16_t octToDec = scannerInstance->octalToDecimal(ch, true);
|
|
stringUTF16 += octToDec;
|
|
ASSERT(octToDec < 256);
|
|
} else {
|
|
stringUTF16 += ch;
|
|
if (ch >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance)
|
|
{
|
|
size_t indexBackup = scannerInstance->index;
|
|
size_t lineNumberBackup = scannerInstance->lineNumber;
|
|
size_t lineStartBackup = scannerInstance->lineStart;
|
|
|
|
scannerInstance->index = this->start;
|
|
char16_t quote = scannerInstance->peekChar();
|
|
ASSERT((quote == '\'' || quote == '"'));
|
|
// 'String literal must starts with a quote');
|
|
|
|
++scannerInstance->index;
|
|
bool isEveryCharLatin1 = true;
|
|
|
|
UTF16StringDataNonGCStd stringUTF16;
|
|
while (true) {
|
|
char16_t ch = scannerInstance->peekChar();
|
|
++scannerInstance->index;
|
|
if (ch == quote) {
|
|
quote = '\0';
|
|
break;
|
|
} else if (UNLIKELY(ch == '\\')) {
|
|
ch = scannerInstance->peekChar();
|
|
++scannerInstance->index;
|
|
if (!ch || !isLineTerminator(ch)) {
|
|
this->constructStringLiteralHelperAppendUTF16(scannerInstance, ch, stringUTF16, isEveryCharLatin1);
|
|
} else {
|
|
++scannerInstance->lineNumber;
|
|
char16_t bufferedChar = scannerInstance->peekChar();
|
|
if ((ch == '\r' && bufferedChar == '\n') || (ch == '\n' && bufferedChar == '\r')) {
|
|
++scannerInstance->index;
|
|
}
|
|
scannerInstance->lineStart = scannerInstance->index;
|
|
}
|
|
} else if (UNLIKELY(isLineTerminator(ch))) {
|
|
break;
|
|
} else {
|
|
stringUTF16 += ch;
|
|
if (ch >= 256) {
|
|
isEveryCharLatin1 = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
scannerInstance->index = indexBackup;
|
|
scannerInstance->lineNumber = lineNumberBackup;
|
|
scannerInstance->lineStart = lineStartBackup;
|
|
|
|
String* newStr;
|
|
if (isEveryCharLatin1) {
|
|
newStr = String::fromLatin1(stringUTF16.data(), stringUTF16.length());
|
|
} else {
|
|
newStr = new UTF16String(stringUTF16.data(), stringUTF16.length());
|
|
}
|
|
this->valueStringLiteralData.m_stringIfNewlyAllocated = newStr;
|
|
}
|
|
|
|
Scanner::Scanner(::Escargot::Context* escargotContext, ::Escargot::esprima::ParserContext* parserContext, StringView code, bool isModule, size_t startLine, size_t startColumn)
|
|
: source(code, 0, code.length())
|
|
, sourceAsNormalView(code)
|
|
, escargotContext(escargotContext)
|
|
, parserContext(parserContext)
|
|
, sourceCodeAccessData(code.bufferAccessData())
|
|
, isModule(isModule)
|
|
, length(code.length())
|
|
, index(0)
|
|
, lineNumber(startLine)
|
|
, lineStart(startColumn)
|
|
{
|
|
ASSERT(escargotContext != nullptr);
|
|
// trackComment = false;
|
|
}
|
|
|
|
void Scanner::resetSource(StringView code)
|
|
{
|
|
this->source = ParserStringView(code, 0, code.length());
|
|
this->sourceAsNormalView = code;
|
|
this->sourceCodeAccessData = code.bufferAccessData();
|
|
this->length = code.length();
|
|
this->index = 0;
|
|
this->lineNumber = 1;
|
|
this->lineStart = 0;
|
|
}
|
|
|
|
void Scanner::skipSingleLine()
|
|
{
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
|
|
if (isLineTerminator(ch)) {
|
|
if (ch == 13 && this->peekCharWithoutEOF() == 10) {
|
|
++this->index;
|
|
}
|
|
++this->lineNumber;
|
|
this->lineStart = this->index;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Scanner::skipSingleLineComment(void)
|
|
{
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
|
|
if (isLineTerminator(ch)) {
|
|
if (ch == 13 && this->peekCharWithoutEOF() == 10) {
|
|
++this->index;
|
|
}
|
|
++this->lineNumber;
|
|
this->lineStart = this->index;
|
|
// return comments;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Scanner::skipMultiLineComment(void)
|
|
{
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
|
|
if (isLineTerminator(ch)) {
|
|
if (ch == 0x0D && this->peekCharWithoutEOF() == 0x0A) {
|
|
++this->index;
|
|
}
|
|
++this->lineNumber;
|
|
this->lineStart = this->index;
|
|
} else if (ch == 0x2A && this->peekCharWithoutEOF() == 0x2F) {
|
|
// Block comment ends with '*/'.
|
|
++this->index;
|
|
return;
|
|
}
|
|
}
|
|
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
char32_t Scanner::scanHexEscape(char prefix)
|
|
{
|
|
size_t len = (prefix == 'u') ? 4 : 2;
|
|
char32_t code = 0;
|
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
if (!this->eof() && isHexDigit(this->peekCharWithoutEOF())) {
|
|
code = code * 16 + hexValue(this->peekCharWithoutEOF());
|
|
++this->index;
|
|
} else {
|
|
return EMPTY_CODE_POINT;
|
|
}
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
char32_t Scanner::scanUnicodeCodePointEscape()
|
|
{
|
|
// At least, one hex digit is required.
|
|
if (this->eof() || this->peekCharWithoutEOF() == '}') {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
char32_t code = 0;
|
|
char16_t ch;
|
|
|
|
while (!this->eof()) {
|
|
ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (!isHexDigit(ch)) {
|
|
break;
|
|
}
|
|
code = code * 16 + hexValue(ch);
|
|
}
|
|
|
|
if (code > 0x10FFFF || ch != '}') {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
Scanner::ScanIDResult Scanner::getIdentifier()
|
|
{
|
|
const size_t start = this->index;
|
|
++this->index;
|
|
while (UNLIKELY(!this->eof())) {
|
|
const char16_t ch = this->peekCharWithoutEOF();
|
|
if (UNLIKELY(ch == 0x5C)) {
|
|
// Blackslash (U+005C) marks Unicode escape sequence.
|
|
this->index = start;
|
|
return this->getComplexIdentifier();
|
|
} else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) {
|
|
// Need to handle surrogate pairs.
|
|
this->index = start;
|
|
return this->getComplexIdentifier();
|
|
}
|
|
if (isIdentifierPart(ch)) {
|
|
++this->index;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
const auto& srcData = this->source.bufferAccessData();
|
|
StringBufferAccessData ad(srcData.has8BitContent, this->index - start,
|
|
srcData.has8BitContent ? reinterpret_cast<void*>(((LChar*)srcData.buffer) + start) : reinterpret_cast<void*>(((char16_t*)srcData.buffer) + start));
|
|
|
|
return std::make_tuple(ad, nullptr);
|
|
}
|
|
|
|
Scanner::ScanIDResult Scanner::getComplexIdentifier()
|
|
{
|
|
char16_t cp = this->codePointAt(this->index);
|
|
ParserCharPiece piece = ParserCharPiece(cp);
|
|
UTF16StringDataNonGCStd id(piece.data, piece.length);
|
|
this->index += id.length();
|
|
|
|
// '\u' (U+005C, U+0075) denotes an escaped character.
|
|
char32_t ch;
|
|
if (cp == 0x5C) {
|
|
if (this->peekChar() != 0x75) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
++this->index;
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
ch = this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
ch = this->scanHexEscape('u');
|
|
cp = ch;
|
|
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
id = ch;
|
|
}
|
|
|
|
while (!this->eof()) {
|
|
cp = this->codePointAt(this->index);
|
|
if (!isIdentifierPart(cp)) {
|
|
break;
|
|
}
|
|
|
|
// ch = Character.fromCodePoint(cp);
|
|
ch = cp;
|
|
|
|
if (ch >= 128 && this->peekChar() >= 0xD800 && this->peekChar() < 0xDFFF) {
|
|
ch = peekChar();
|
|
++this->index;
|
|
char32_t ch2 = this->peekChar();
|
|
if (U16_IS_TRAIL(ch2)) {
|
|
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
|
|
}
|
|
--this->index;
|
|
}
|
|
piece = ParserCharPiece(ch);
|
|
id += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
this->index += piece.length;
|
|
|
|
// '\u' (U+005C, U+0075) denotes an escaped character.
|
|
if (cp == 0x5C) {
|
|
// id = id.substr(0, id.length - 1);
|
|
id.erase(id.length() - 1);
|
|
|
|
if (this->peekChar() != 0x75) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
++this->index;
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
ch = this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
ch = this->scanHexEscape('u');
|
|
cp = ch;
|
|
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierPart(cp)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
piece = ParserCharPiece(ch);
|
|
id += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
}
|
|
}
|
|
|
|
String* str = new UTF16String(id.data(), id.length());
|
|
|
|
if (UNLIKELY(this->parserContext->await && id == u"await")) {
|
|
this->throwUnexpectedToken(Messages::KeywordMustNotContainEscapedCharacters);
|
|
}
|
|
|
|
return std::make_tuple(str->bufferAccessData(), str);
|
|
}
|
|
|
|
uint16_t Scanner::octalToDecimal(char16_t ch, bool octal)
|
|
{
|
|
// \0 is not octal escape sequence
|
|
char16_t code = octalValue(ch);
|
|
|
|
octal |= (ch != '0');
|
|
|
|
if (!this->eof() && isOctalDigit(this->peekChar())) {
|
|
octal = true;
|
|
code = code * 8 + octalValue(this->peekChar());
|
|
++this->index;
|
|
|
|
// 3 digits are only allowed when string starts
|
|
// with 0, 1, 2, 3
|
|
// if ('0123'.indexOf(ch) >= 0 && !this->eof() && Character.isOctalDigit(this->source.charCodeAt(this->index))) {
|
|
if ((ch >= '0' && ch <= '3') && !this->eof() && isOctalDigit(this->peekChar())) {
|
|
code = code * 8 + octalValue(this->peekChar());
|
|
++this->index;
|
|
}
|
|
}
|
|
|
|
ASSERT(!octal || code < NON_OCTAL_VALUE);
|
|
return octal ? code : NON_OCTAL_VALUE;
|
|
};
|
|
|
|
void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)
|
|
{
|
|
const size_t start = this->index;
|
|
PunctuatorKind kind;
|
|
// Check for most common single-character punctuators.
|
|
++this->index;
|
|
|
|
switch (ch) {
|
|
case '(':
|
|
kind = LeftParenthesis;
|
|
break;
|
|
|
|
case '{':
|
|
kind = LeftBrace;
|
|
break;
|
|
|
|
case '.':
|
|
kind = Period;
|
|
if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
|
|
// Spread operator "..."
|
|
this->index += 2;
|
|
kind = PeriodPeriodPeriod;
|
|
}
|
|
break;
|
|
|
|
case '}':
|
|
kind = RightBrace;
|
|
break;
|
|
case ')':
|
|
kind = RightParenthesis;
|
|
break;
|
|
case ';':
|
|
kind = SemiColon;
|
|
break;
|
|
case ',':
|
|
kind = Comma;
|
|
break;
|
|
case '[':
|
|
kind = LeftSquareBracket;
|
|
break;
|
|
case ']':
|
|
kind = RightSquareBracket;
|
|
break;
|
|
case ':':
|
|
kind = Colon;
|
|
break;
|
|
case '?':
|
|
kind = GuessMark;
|
|
ch = this->peekChar();
|
|
if (ch == '?') {
|
|
++this->index;
|
|
kind = NullishCoalescing;
|
|
if (this->peekChar() == '=') {
|
|
kind = LogicalNullishEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '.') {
|
|
++this->index;
|
|
kind = GuessDot;
|
|
}
|
|
break;
|
|
case '~':
|
|
kind = Wave;
|
|
break;
|
|
|
|
case '>':
|
|
ch = this->peekChar();
|
|
kind = RightInequality;
|
|
|
|
if (ch == '>') {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
kind = RightShift;
|
|
|
|
if (ch == '>') {
|
|
++this->index;
|
|
kind = UnsignedRightShift;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = UnsignedRightShiftEqual;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = RightShiftEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = RightInequalityEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '<':
|
|
ch = this->peekChar();
|
|
kind = LeftInequality;
|
|
|
|
if (ch == '<') {
|
|
++this->index;
|
|
kind = LeftShift;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = LeftShiftEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = LeftInequalityEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '=':
|
|
ch = this->peekChar();
|
|
kind = Substitution;
|
|
|
|
if (ch == '=') {
|
|
++this->index;
|
|
kind = Equal;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = StrictEqual;
|
|
++this->index;
|
|
}
|
|
} else if (ch == '>') {
|
|
kind = Arrow;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '!':
|
|
kind = ExclamationMark;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = NotEqual;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = NotStrictEqual;
|
|
++this->index;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '&':
|
|
ch = this->peekChar();
|
|
kind = BitwiseAnd;
|
|
|
|
if (ch == '&') {
|
|
kind = LogicalAnd;
|
|
++this->index;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = LogicalAndEqual;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = BitwiseAndEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
case '|':
|
|
ch = this->peekChar();
|
|
kind = BitwiseOr;
|
|
|
|
if (ch == '|') {
|
|
kind = LogicalOr;
|
|
++this->index;
|
|
|
|
if (this->peekChar() == '=') {
|
|
++this->index;
|
|
kind = LogicalOrEqual;
|
|
}
|
|
} else if (ch == '=') {
|
|
kind = BitwiseOrEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '^':
|
|
kind = BitwiseXor;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = BitwiseXorEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '+':
|
|
ch = this->peekChar();
|
|
kind = Plus;
|
|
|
|
if (ch == '+') {
|
|
kind = PlusPlus;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = PlusEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '-':
|
|
ch = this->peekChar();
|
|
kind = Minus;
|
|
|
|
if (ch == '-') {
|
|
kind = MinusMinus;
|
|
++this->index;
|
|
} else if (ch == '=') {
|
|
kind = MinusEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '*':
|
|
ch = this->peekChar();
|
|
kind = Multiply;
|
|
|
|
if (ch == '=') {
|
|
kind = MultiplyEqual;
|
|
++this->index;
|
|
} else if (ch == '*') {
|
|
kind = Exponentiation;
|
|
++this->index;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = ExponentiationEqual;
|
|
++this->index;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '/':
|
|
kind = Divide;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = DivideEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '%':
|
|
kind = Mod;
|
|
|
|
if (this->peekChar() == '=') {
|
|
kind = ModEqual;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
case '#':
|
|
kind = Hash;
|
|
if (this->index == 1 && this->peekChar() == '!') {
|
|
kind = HashBang;
|
|
++this->index;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
this->throwUnexpectedToken();
|
|
kind = PunctuatorKindEnd;
|
|
break;
|
|
}
|
|
|
|
token->setPunctuatorResult(this->lineNumber, this->lineStart, start, this->index, kind);
|
|
}
|
|
|
|
void Scanner::testNumericSeparator(size_t start, bool isBigInt, bool isHex, bool isBinary, bool isOctal)
|
|
{
|
|
for (size_t i = start; i < this->index - 1; i++) {
|
|
char16_t ch = this->sourceCharAt(i);
|
|
if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError);
|
|
}
|
|
if (UNLIKELY(isHex && (ch == 'x' || ch == 'X') && this->sourceCharAt(i + 1) == '_')) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
if (UNLIKELY(isBinary && (ch == 'b' || ch == 'B') && this->sourceCharAt(i + 1) == '_')) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
if (UNLIKELY(isOctal && (ch == 'o' || ch == 'O') && this->sourceCharAt(i + 1) == '_')) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError);
|
|
}
|
|
}
|
|
|
|
void Scanner::scanHexLiteral(Scanner::ScannerResult* token, size_t start)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
double numberDouble = 0.0;
|
|
bool shouldUseDouble = false;
|
|
bool scanned = false;
|
|
bool seenUnderscore = false;
|
|
|
|
size_t shiftCount = 0;
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isHexDigitOrUnderscore(ch, seenUnderscore)) {
|
|
break;
|
|
}
|
|
if (UNLIKELY(ch == '_')) {
|
|
this->index++;
|
|
continue;
|
|
}
|
|
if (shouldUseDouble) {
|
|
numberDouble = numberDouble * 16 + toHexNumericValue(ch);
|
|
} else {
|
|
number = (number << 4) + toHexNumericValue(ch);
|
|
if (++shiftCount >= 16) {
|
|
shouldUseDouble = true;
|
|
numberDouble = number;
|
|
number = 0;
|
|
}
|
|
}
|
|
this->index++;
|
|
scanned = true;
|
|
}
|
|
|
|
if (!scanned) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
bool isEof = this->eof();
|
|
bool isBigInt = !isEof && this->peekChar() == 'n';
|
|
|
|
if (UNLIKELY(isBigInt)) {
|
|
++this->index;
|
|
}
|
|
|
|
if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (UNLIKELY(seenUnderscore)) {
|
|
testNumericSeparator(start, isBigInt, true, false, false);
|
|
}
|
|
|
|
if (shouldUseDouble) {
|
|
ASSERT(number == 0);
|
|
token->setNumericLiteralResult(numberDouble, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
|
|
} else {
|
|
ASSERT(numberDouble == 0.0);
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
|
|
}
|
|
}
|
|
|
|
void Scanner::scanBinaryLiteral(Scanner::ScannerResult* token, size_t start)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
bool scanned = false;
|
|
bool seenUnderscore = false;
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (ch == '0' || ch == '1') {
|
|
number = (number << 1) + ch - '0';
|
|
this->index++;
|
|
scanned = true;
|
|
} else if (ch == '_') {
|
|
this->index++;
|
|
seenUnderscore = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!scanned) {
|
|
// only 0b or 0B
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
bool isEof = this->eof();
|
|
bool isBigInt = !isEof && this->peekChar() == 'n';
|
|
|
|
if (UNLIKELY(isBigInt)) {
|
|
++this->index;
|
|
}
|
|
|
|
if (UNLIKELY(!isEof && (isIdentifierStart(this->peekChar()) || isDecimalDigit(this->peekChar())))) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (UNLIKELY(seenUnderscore)) {
|
|
testNumericSeparator(start, isBigInt, false, true, false);
|
|
}
|
|
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
|
|
}
|
|
|
|
void Scanner::scanOctalLiteral(Scanner::ScannerResult* token, char16_t prefix, size_t start, bool isLegacyOctal)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
uint64_t number = 0;
|
|
bool scanned = false;
|
|
bool octal = isOctalDigit(prefix);
|
|
bool seenUnderscore = false;
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isLegacyOctal) {
|
|
if (UNLIKELY(ch == '_')) {
|
|
this->index++;
|
|
seenUnderscore = true;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (!isOctalDigit(ch)) {
|
|
break;
|
|
}
|
|
|
|
number = (number << 3) + ch - '0';
|
|
this->index++;
|
|
scanned = true;
|
|
}
|
|
|
|
if (!octal && !scanned) {
|
|
// only 0o or 0O
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
bool isEof = this->eof();
|
|
bool isBigInt = !isEof && !isLegacyOctal && this->peekChar() == 'n';
|
|
|
|
if (UNLIKELY(isBigInt)) {
|
|
++this->index;
|
|
}
|
|
|
|
char16_t ch = this->peekChar();
|
|
if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
|
|
throwUnexpectedToken();
|
|
}
|
|
|
|
if (UNLIKELY(seenUnderscore)) {
|
|
testNumericSeparator(start, isBigInt, false, false, true);
|
|
}
|
|
|
|
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
|
|
token->octal = octal;
|
|
}
|
|
|
|
bool Scanner::isImplicitOctalLiteral()
|
|
{
|
|
// Implicit octal, unless there is a non-octal digit.
|
|
// (Annex B.1.1 on Numeric Literals)
|
|
for (size_t i = this->index + 1; i < this->length; ++i) {
|
|
const char16_t ch = this->sourceCharAt(i);
|
|
if (ch == '8' || ch == '9') {
|
|
return false;
|
|
}
|
|
if (!isOctalDigit(ch)) {
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Scanner::scanNumericLiteral(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
char16_t ch = this->peekChar();
|
|
char16_t startChar = ch;
|
|
ASSERT(isDecimalDigit(ch) || (ch == '.'));
|
|
// 'Numeric literal must start with a decimal digit or a decimal point');
|
|
|
|
bool seenDotOrE = false;
|
|
bool seenUnderscore = false;
|
|
|
|
if (ch != '.') {
|
|
auto number = this->peekChar();
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
|
|
// Hex number starts with '0x'.
|
|
// Octal number starts with '0'.
|
|
// Octal number in ES6 starts with '0o'.
|
|
// Binary number in ES6 starts with '0b'.
|
|
if (number == '0') {
|
|
if (ch == 'x' || ch == 'X') {
|
|
++this->index;
|
|
return this->scanHexLiteral(token, start);
|
|
}
|
|
if (ch == 'b' || ch == 'B') {
|
|
++this->index;
|
|
return this->scanBinaryLiteral(token, start);
|
|
}
|
|
if (ch == 'o' || ch == 'O') {
|
|
++this->index;
|
|
return this->scanOctalLiteral(token, ch, start, false);
|
|
}
|
|
|
|
if (ch && isOctalDigit(ch) && this->isImplicitOctalLiteral()) {
|
|
return this->scanOctalLiteral(token, ch, start, true);
|
|
}
|
|
}
|
|
|
|
while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) {
|
|
++this->index;
|
|
}
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (ch == '.') {
|
|
seenDotOrE = true;
|
|
++this->index;
|
|
while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) {
|
|
++this->index;
|
|
}
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (ch == 'e' || ch == 'E') {
|
|
seenDotOrE = true;
|
|
++this->index;
|
|
|
|
ch = this->peekChar();
|
|
if (ch == '+' || ch == '-') {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
}
|
|
|
|
if (isDecimalDigit(ch)) {
|
|
do {
|
|
++this->index;
|
|
ch = this->peekChar();
|
|
} while (isDecimalDigitOrUnderscore(ch, seenUnderscore));
|
|
} else {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
|
|
bool isEof = this->eof();
|
|
bool isBigInt = !isEof && this->peekChar() == 'n';
|
|
|
|
if (UNLIKELY(isBigInt)) {
|
|
if (seenDotOrE || (startChar == '0' && (this->index - start) > 1)) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
++this->index;
|
|
}
|
|
|
|
if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (UNLIKELY(seenUnderscore)) {
|
|
if (this->sourceCharAt(start) == '0') {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator can not be used after leading 0"), ErrorObject::SyntaxError);
|
|
}
|
|
|
|
for (size_t i = start; i < this->index - 1; i++) {
|
|
char16_t ch = this->sourceCharAt(i);
|
|
if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError);
|
|
}
|
|
if (UNLIKELY(ch == '_' && (this->sourceCharAt(i + 1) == 'e' || this->sourceCharAt(i + 1) == 'E'))) {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator may not appear adjacent to ExponentPart"), ErrorObject::SyntaxError);
|
|
}
|
|
if (UNLIKELY(ch == '.' && this->sourceCharAt(i + 1) == '_')) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) {
|
|
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError);
|
|
}
|
|
}
|
|
|
|
token->setNumericLiteralResult(0, this->lineNumber, this->lineStart, start, this->index, true, seenUnderscore);
|
|
if (UNLIKELY(startChar == '0' && !seenDotOrE && (this->index - start) > (isBigInt ? 2 : 1))) {
|
|
token->startWithZero = true;
|
|
}
|
|
}
|
|
|
|
void Scanner::scanStringLiteral(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
char16_t quote = this->peekChar();
|
|
ASSERT((quote == '\'' || quote == '"'));
|
|
// 'String literal must starts with a quote');
|
|
|
|
++this->index;
|
|
bool octal = false;
|
|
bool isPlainCase = true;
|
|
|
|
while (LIKELY(!this->eof())) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (ch == quote) {
|
|
quote = '\0';
|
|
break;
|
|
} else if (UNLIKELY(ch == '\\')) {
|
|
ch = this->peekChar();
|
|
++this->index;
|
|
isPlainCase = false;
|
|
if (!ch || !isLineTerminator(ch)) {
|
|
switch (ch) {
|
|
case 'u':
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
this->scanUnicodeCodePointEscape();
|
|
} else if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
break;
|
|
case 'x':
|
|
if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
break;
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'b':
|
|
case 'f':
|
|
case 'v':
|
|
break;
|
|
|
|
default:
|
|
if (ch && isOctalDigit(ch)) {
|
|
octal |= (this->octalToDecimal(ch, false) != NON_OCTAL_VALUE);
|
|
} else if (isDecimalDigit(ch)) {
|
|
octal = true;
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
} else if (ch == '\n' && this->peekChar() == '\r') {
|
|
++this->index;
|
|
}
|
|
this->lineStart = this->index;
|
|
}
|
|
} else if (UNLIKELY(ch < 128 && (g_asciiRangeCharMap[ch] & LexerIsCharLineTerminator))) {
|
|
// while parsing string literal, we should not end parsing string token with 0x2028 or 0x2029
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (quote != '\0') {
|
|
this->index = start;
|
|
this->throwUnexpectedToken();
|
|
}
|
|
|
|
if (isPlainCase) {
|
|
token->setResult(Token::StringLiteralToken, start + 1, this->index - 1, this->lineNumber, this->lineStart, start, this->index, octal);
|
|
} else {
|
|
// build string if needs
|
|
token->setResult(Token::StringLiteralToken, (String*)nullptr, this->lineNumber, this->lineStart, start, this->index, octal);
|
|
}
|
|
}
|
|
|
|
bool Scanner::isFutureReservedWord(const ParserStringView& id)
|
|
{
|
|
const StringBufferAccessData& data = id.bufferAccessData();
|
|
switch (data.length) {
|
|
case 4:
|
|
return data.equalsSameLength("enum");
|
|
case 5:
|
|
return data.equalsSameLength("super");
|
|
case 6:
|
|
return data.equalsSameLength("export") || data.equalsSameLength("import");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Scanner::isStrictModeReservedWord(::Escargot::Context* ctx, const AtomicString& identifier)
|
|
{
|
|
switch (identifier.string()->length()) {
|
|
case 3: // let
|
|
return identifier == ctx->staticStrings().let;
|
|
case 5: // yield
|
|
return identifier == ctx->staticStrings().yield;
|
|
case 6: // static public
|
|
return identifier == ctx->staticStrings().stringStatic || identifier == ctx->staticStrings().stringPublic;
|
|
case 7: // private package
|
|
return identifier == ctx->staticStrings().stringPrivate || identifier == ctx->staticStrings().package;
|
|
case 9: // protected interface
|
|
return identifier == ctx->staticStrings().stringProtected || identifier == ctx->staticStrings().interface;
|
|
case 10: // implements
|
|
return identifier == ctx->staticStrings().implements;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
// TODO apply rope-string
|
|
UTF16StringDataNonGCStd cooked;
|
|
UTF16StringDataNonGCStd raw;
|
|
bool terminated = false;
|
|
Optional<esprima::Error*> error;
|
|
size_t start = this->index;
|
|
size_t indexForError = this->index;
|
|
bool tail = false;
|
|
|
|
try {
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
indexForError = this->index;
|
|
if (ch == '`') {
|
|
tail = true;
|
|
terminated = true;
|
|
break;
|
|
} else if (ch == '$') {
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
indexForError = this->index;
|
|
terminated = true;
|
|
break;
|
|
}
|
|
cooked += ch;
|
|
raw += ch;
|
|
} else if (ch == '\\') {
|
|
raw += ch;
|
|
ch = this->peekChar();
|
|
if (!isLineTerminator(ch)) {
|
|
auto currentIndex = this->index;
|
|
++this->index;
|
|
switch (ch) {
|
|
case 'n':
|
|
cooked += '\n';
|
|
break;
|
|
case 'r':
|
|
cooked += '\r';
|
|
break;
|
|
case 't':
|
|
cooked += '\t';
|
|
break;
|
|
case 'u':
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
cooked += this->scanUnicodeCodePointEscape();
|
|
} else {
|
|
const size_t restore = this->index;
|
|
const char32_t unescaped = this->scanHexEscape(ch);
|
|
if (unescaped != EMPTY_CODE_POINT) {
|
|
ParserCharPiece piece(unescaped);
|
|
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
} else {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
}
|
|
break;
|
|
case 'x': {
|
|
const char32_t unescaped = this->scanHexEscape(ch);
|
|
if (unescaped == EMPTY_CODE_POINT) {
|
|
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
|
|
}
|
|
ParserCharPiece piece(unescaped);
|
|
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
break;
|
|
}
|
|
case 'b':
|
|
cooked += '\b';
|
|
break;
|
|
case 'f':
|
|
cooked += '\f';
|
|
break;
|
|
case 'v':
|
|
cooked += '\v';
|
|
break;
|
|
default:
|
|
if (ch == '0') {
|
|
if (isDecimalDigit(this->peekChar())) {
|
|
// Illegal: \01 \02 and so on
|
|
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
|
|
}
|
|
cooked += (char16_t)'\0';
|
|
} else if (isOctalDigit(ch)) {
|
|
// Illegal: \1 \2
|
|
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
|
|
} else {
|
|
cooked += ch;
|
|
}
|
|
break;
|
|
}
|
|
auto endIndex = this->index;
|
|
for (size_t i = currentIndex; i < endIndex; i++) {
|
|
raw += this->sourceCharAt(i);
|
|
}
|
|
} else {
|
|
++this->index;
|
|
indexForError = this->index;
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
indexForError = this->index;
|
|
}
|
|
if (ch == 0x2028 || ch == 0x2029) {
|
|
raw += ch;
|
|
} else {
|
|
raw += '\n';
|
|
}
|
|
this->lineStart = this->index;
|
|
}
|
|
} else if (isLineTerminator(ch)) {
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
indexForError = this->index;
|
|
}
|
|
if (ch == 0x2028 || ch == 0x2029) {
|
|
raw += ch;
|
|
cooked += ch;
|
|
} else {
|
|
raw += '\n';
|
|
cooked += '\n';
|
|
}
|
|
this->lineStart = this->index;
|
|
} else {
|
|
cooked += ch;
|
|
raw += ch;
|
|
}
|
|
}
|
|
|
|
if (!terminated) {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
} catch (esprima::Error* err) {
|
|
error = new (GC) esprima::Error(*err);
|
|
delete err;
|
|
this->index = indexForError;
|
|
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
if (ch == '`') {
|
|
tail = true;
|
|
terminated = true;
|
|
break;
|
|
} else if (ch == '$') {
|
|
if (this->peekChar() == '{') {
|
|
++this->index;
|
|
terminated = true;
|
|
break;
|
|
}
|
|
cooked += ch;
|
|
raw += ch;
|
|
} else if (isLineTerminator(ch)) {
|
|
++this->lineNumber;
|
|
if (ch == '\r' && this->peekChar() == '\n') {
|
|
++this->index;
|
|
}
|
|
if (ch == 0x2028 || ch == 0x2029) {
|
|
raw += ch;
|
|
} else {
|
|
raw += '\n';
|
|
}
|
|
this->lineStart = this->index;
|
|
} else {
|
|
raw += ch;
|
|
}
|
|
}
|
|
}
|
|
|
|
ScanTemplateResult* result = new ScanTemplateResult();
|
|
result->head = head;
|
|
result->tail = tail;
|
|
result->valueRaw = UTF16StringData(raw.data(), raw.length());
|
|
if (error) {
|
|
result->error = error;
|
|
} else {
|
|
result->valueCooked = UTF16StringData(cooked.data(), cooked.length());
|
|
}
|
|
|
|
if (head) {
|
|
start--;
|
|
}
|
|
|
|
token->setTemplateTokenResult(result, this->lineNumber, this->lineStart, start, this->index);
|
|
}
|
|
|
|
String* Scanner::scanRegExpBody()
|
|
{
|
|
char16_t ch = this->peekChar();
|
|
ASSERT(ch == '/');
|
|
// assert(ch == '/', 'Regular expression literal must start with a slash');
|
|
|
|
// TODO apply rope-string
|
|
char16_t ch0 = this->peekChar();
|
|
++this->index;
|
|
UTF16StringDataNonGCStd str(&ch0, 1);
|
|
bool classMarker = false;
|
|
bool terminated = false;
|
|
|
|
while (!this->eof()) {
|
|
ch = this->peekCharWithoutEOF();
|
|
++this->index;
|
|
str += ch;
|
|
if (ch == '\\') {
|
|
ch = this->peekChar();
|
|
++this->index;
|
|
// ECMA-262 7.8.5
|
|
if (isLineTerminator(ch)) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
}
|
|
str += ch;
|
|
} else if (isLineTerminator(ch)) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
} else if (classMarker) {
|
|
if (ch == ']') {
|
|
classMarker = false;
|
|
}
|
|
} else {
|
|
if (ch == '/') {
|
|
terminated = true;
|
|
break;
|
|
} else if (ch == '[') {
|
|
classMarker = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!terminated) {
|
|
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
|
|
}
|
|
|
|
// Exclude leading and trailing slash.
|
|
str = str.substr(1, str.length() - 2);
|
|
if (isAllASCII(str.data(), str.length())) {
|
|
return new ASCIIString(str.data(), str.length());
|
|
}
|
|
|
|
return new UTF16String(str.data(), str.length());
|
|
}
|
|
|
|
String* Scanner::scanRegExpFlags()
|
|
{
|
|
// UTF16StringData str = '';
|
|
UTF16StringDataNonGCStd flags;
|
|
while (!this->eof()) {
|
|
char16_t ch = this->peekCharWithoutEOF();
|
|
if (!isIdentifierPart(ch)) {
|
|
break;
|
|
}
|
|
|
|
++this->index;
|
|
if (ch == '\\' && !this->eof()) {
|
|
ch = this->peekChar();
|
|
if (ch == 'u') {
|
|
++this->index;
|
|
const size_t restore = this->index;
|
|
char32_t ch32 = this->scanHexEscape('u');
|
|
if (ch32 != EMPTY_CODE_POINT) {
|
|
ParserCharPiece piece(ch32);
|
|
flags += UTF16StringDataNonGCStd(piece.data, piece.length);
|
|
/*
|
|
for (str += '\\u'; restore < this->index; ++restore) {
|
|
str += this->source[restore];
|
|
}*/
|
|
} else {
|
|
this->index = restore;
|
|
flags += 'u';
|
|
// str += '\\u';
|
|
}
|
|
this->throwUnexpectedToken();
|
|
} else {
|
|
// str += '\\';
|
|
this->throwUnexpectedToken();
|
|
}
|
|
} else {
|
|
flags += ch;
|
|
// str += ch;
|
|
}
|
|
}
|
|
|
|
if (!flags.length()) {
|
|
return String::emptyString;
|
|
}
|
|
|
|
if (isAllASCII(flags.data(), flags.length())) {
|
|
return String::fromLatin1(flags.data(), flags.length());
|
|
}
|
|
|
|
return new UTF16String(flags.data(), flags.length());
|
|
}
|
|
|
|
void Scanner::scanRegExp(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
const size_t start = this->index;
|
|
|
|
String* body = this->scanRegExpBody();
|
|
String* flags = this->scanRegExpFlags();
|
|
// const value = this->testRegExp(body.value, flags.value);
|
|
|
|
ScanRegExpResult result;
|
|
result.body = body;
|
|
result.flags = flags;
|
|
token->setResult(Token::RegularExpressionToken, this->lineNumber, this->lineStart, start, this->index);
|
|
token->valueRegExp = result;
|
|
}
|
|
|
|
// ECMA-262 11.6.2.1 Keywords
|
|
static ALWAYS_INLINE KeywordKind getKeyword(const StringBufferAccessData& data)
|
|
{
|
|
// 'const' is specialized as Keyword in V8.
|
|
// 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
|
|
// Some others are from future reserved words.
|
|
|
|
size_t length = data.length;
|
|
char16_t first = data.charAt(0);
|
|
char16_t second;
|
|
|
|
switch (first) {
|
|
case 'a':
|
|
switch (length) {
|
|
case 2:
|
|
if (data.charAt(1) == 's') {
|
|
return AsKeyword;
|
|
}
|
|
break;
|
|
case 5:
|
|
second = data.charAt(1);
|
|
if (second == 's' && data.equalsSameLength("async", 2)) {
|
|
return AsyncKeyword;
|
|
} else if (second == 'w' && data.equalsSameLength("await", 2)) {
|
|
return AwaitKeyword;
|
|
}
|
|
break;
|
|
case 9:
|
|
if (data.equalsSameLength("arguments", 1)) {
|
|
return ArgumentsKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'b':
|
|
if (length == 5 && data.equalsSameLength("break", 1)) {
|
|
return BreakKeyword;
|
|
}
|
|
break;
|
|
case 'c':
|
|
if (length == 4) {
|
|
if (data.equalsSameLength("case", 1)) {
|
|
return CaseKeyword;
|
|
}
|
|
} else if (length == 5) {
|
|
second = data.charAt(1);
|
|
if (second == 'a' && data.equalsSameLength("catch", 2)) {
|
|
return CatchKeyword;
|
|
} else if (second == 'o' && data.equalsSameLength("const", 2)) {
|
|
return ConstKeyword;
|
|
} else if (second == 'l' && data.equalsSameLength("class", 2)) {
|
|
return ClassKeyword;
|
|
}
|
|
} else if (length == 8 && data.equalsSameLength("continue", 1)) {
|
|
return ContinueKeyword;
|
|
}
|
|
break;
|
|
case 'd':
|
|
switch (length) {
|
|
case 2:
|
|
if (data.charAt(1) == 'o') {
|
|
return DoKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
if (data.equalsSameLength("delete", 1)) {
|
|
return DeleteKeyword;
|
|
}
|
|
break;
|
|
case 7:
|
|
if (data.equalsSameLength("default", 1)) {
|
|
return DefaultKeyword;
|
|
}
|
|
break;
|
|
case 8:
|
|
if (data.equalsSameLength("debugger", 1)) {
|
|
return DebuggerKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'e':
|
|
switch (length) {
|
|
case 4:
|
|
second = data.charAt(1);
|
|
if (second == 'l' && data.equalsSameLength("else", 2)) {
|
|
return ElseKeyword;
|
|
} else if (second == 'n' && data.equalsSameLength("enum", 2)) {
|
|
return EnumKeyword;
|
|
} else if (second == 'v' && data.equalsSameLength("eval", 2)) {
|
|
return EvalKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
if (data.equalsSameLength("export", 1)) {
|
|
return ExportKeyword;
|
|
}
|
|
break;
|
|
case 7:
|
|
if (data.equalsSameLength("extends", 1)) {
|
|
return ExtendsKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'f':
|
|
switch (length) {
|
|
case 3:
|
|
if (data.equalsSameLength("for", 1)) {
|
|
return ForKeyword;
|
|
}
|
|
break;
|
|
case 4:
|
|
if (data.equalsSameLength("from", 1)) {
|
|
return FromKeyword;
|
|
}
|
|
break;
|
|
case 5:
|
|
if (data.equalsSameLength("false", 1)) {
|
|
return FalseKeyword;
|
|
}
|
|
break;
|
|
case 7:
|
|
if (data.equalsSameLength("finally", 1)) {
|
|
return FinallyKeyword;
|
|
}
|
|
break;
|
|
case 8:
|
|
if (data.equalsSameLength("function", 1)) {
|
|
return FunctionKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'g':
|
|
if (length == 3 && data.equalsSameLength("get", 1)) {
|
|
return GetKeyword;
|
|
}
|
|
break;
|
|
case 'i':
|
|
switch (length) {
|
|
case 2:
|
|
second = data.charAt(1);
|
|
if (second == 'f') {
|
|
return IfKeyword;
|
|
} else if (second == 'n') {
|
|
return InKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
if (data.equalsSameLength("import", 1)) {
|
|
return ImportKeyword;
|
|
}
|
|
break;
|
|
case 9:
|
|
if (data.equalsSameLength("interface", 1)) {
|
|
return InterfaceKeyword;
|
|
}
|
|
break;
|
|
case 10:
|
|
second = data.charAt(1);
|
|
if (second == 'n' && data.equalsSameLength("instanceof", 2)) {
|
|
return InstanceofKeyword;
|
|
} else if (second == 'm' && data.equalsSameLength("implements", 2)) {
|
|
return ImplementsKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'l':
|
|
if (length == 3 && data.equalsSameLength("let", 1)) {
|
|
return LetKeyword;
|
|
}
|
|
break;
|
|
case 'n':
|
|
if (length == 3 && data.equalsSameLength("new", 1)) {
|
|
return NewKeyword;
|
|
} else if (length == 4 && data.equalsSameLength("null", 1)) {
|
|
return NullKeyword;
|
|
}
|
|
break;
|
|
case 'o':
|
|
if (length == 2 && data.charAt(1) == 'f') {
|
|
return OfKeyword;
|
|
}
|
|
break;
|
|
case 'p':
|
|
switch (length) {
|
|
case 6:
|
|
if (data.equalsSameLength("public", 1)) {
|
|
return PublicKeyword;
|
|
}
|
|
break;
|
|
case 7:
|
|
second = data.charAt(1);
|
|
if (second == 'a' && data.equalsSameLength("package", 2)) {
|
|
return PackageKeyword;
|
|
} else if (second == 'r' && data.equalsSameLength("private", 2)) {
|
|
return PrivateKeyword;
|
|
}
|
|
break;
|
|
case 9:
|
|
if (data.equalsSameLength("protected", 1)) {
|
|
return ProtectedKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'r':
|
|
if (length == 6 && data.equalsSameLength("return", 1)) {
|
|
return ReturnKeyword;
|
|
}
|
|
break;
|
|
case 's':
|
|
switch (length) {
|
|
case 3:
|
|
if (data.equalsSameLength("set", 1)) {
|
|
return SetKeyword;
|
|
}
|
|
break;
|
|
case 5:
|
|
if (data.equalsSameLength("super", 1)) {
|
|
return SuperKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
second = data.charAt(1);
|
|
if (second == 'w' && data.equalsSameLength("switch", 2)) {
|
|
return SwitchKeyword;
|
|
} else if (second == 't' && data.equalsSameLength("static", 2)) {
|
|
return StaticKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 't':
|
|
switch (length) {
|
|
case 3:
|
|
if (data.equalsSameLength("try", 1)) {
|
|
return TryKeyword;
|
|
}
|
|
break;
|
|
case 4:
|
|
second = data.charAt(1);
|
|
if (second == 'h' && data.equalsSameLength("this", 2)) {
|
|
return ThisKeyword;
|
|
} else if (second == 'r' && data.equalsSameLength("true", 2)) {
|
|
return TrueKeyword;
|
|
}
|
|
break;
|
|
case 5:
|
|
if (data.equalsSameLength("throw", 1)) {
|
|
return ThrowKeyword;
|
|
}
|
|
break;
|
|
case 6:
|
|
if (data.equalsSameLength("typeof", 1)) {
|
|
return TypeofKeyword;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 'v':
|
|
if (length == 3 && data.equalsSameLength("var", 1)) {
|
|
return VarKeyword;
|
|
} else if (length == 4 && data.equalsSameLength("void", 1)) {
|
|
return VoidKeyword;
|
|
}
|
|
break;
|
|
case 'w':
|
|
if (length == 4 && data.equalsSameLength("with", 1)) {
|
|
return WithKeyword;
|
|
} else if (length == 5 && data.equalsSameLength("while", 1)) {
|
|
return WhileKeyword;
|
|
}
|
|
break;
|
|
case 'y':
|
|
if (length == 5 && data.equalsSameLength("yield", 1)) {
|
|
return YieldKeyword;
|
|
}
|
|
break;
|
|
}
|
|
return NotKeyword;
|
|
}
|
|
|
|
ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
Token type = Token::IdentifierToken;
|
|
const size_t start = this->index;
|
|
|
|
// Backslash (U+005C) starts an escaped character.
|
|
ScanIDResult id = UNLIKELY(ch0 == 0x5C) ? this->getComplexIdentifier() : this->getIdentifier();
|
|
const auto& data = std::get<0>(id);
|
|
const size_t end = this->index;
|
|
|
|
// There is no keyword or literal with only one character.
|
|
// Thus, it must be an identifier.
|
|
if (data.length > 1) {
|
|
KeywordKind keywordKind = getKeyword(data);
|
|
|
|
token->secondaryKeywordKind = keywordKind;
|
|
|
|
switch (keywordKind) {
|
|
case NotKeyword:
|
|
break;
|
|
case NullKeyword:
|
|
type = Token::NullLiteralToken;
|
|
break;
|
|
case TrueKeyword:
|
|
case FalseKeyword:
|
|
type = BooleanLiteralToken;
|
|
break;
|
|
case YieldKeyword:
|
|
case LetKeyword:
|
|
token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
|
|
return;
|
|
default:
|
|
if (keywordKind >= StrictModeReservedWord) {
|
|
break;
|
|
}
|
|
token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (UNLIKELY(std::get<1>(id) != nullptr)) {
|
|
token->setResult(type, std::get<1>(id), this->lineNumber, this->lineStart, start, end);
|
|
} else {
|
|
token->setResult(type, start, end, this->lineNumber, this->lineStart, start, end);
|
|
}
|
|
}
|
|
|
|
void Scanner::lex(Scanner::ScannerResult* token)
|
|
{
|
|
ASSERT(token != nullptr);
|
|
|
|
token->resetResult();
|
|
|
|
if (UNLIKELY(this->eof())) {
|
|
token->setResult(Token::EOFToken, this->lineNumber, this->lineStart, this->index, this->index);
|
|
return;
|
|
}
|
|
|
|
char16_t cp = this->peekCharWithoutEOF();
|
|
|
|
if (UNLIKELY(cp >= 128 && cp >= 0xD800 && cp < 0xDFFF)) {
|
|
++this->index;
|
|
char32_t ch2 = this->peekChar();
|
|
if (U16_IS_TRAIL(ch2)) {
|
|
cp = U16_GET_SUPPLEMENTARY(cp, ch2);
|
|
} else {
|
|
this->throwUnexpectedToken();
|
|
}
|
|
}
|
|
|
|
if (isIdentifierStart(cp)) {
|
|
goto ScanID;
|
|
}
|
|
// String literal starts with single quote (U+0027) or double quote (U+0022).
|
|
if (cp == 0x27 || cp == 0x22) {
|
|
this->scanStringLiteral(token);
|
|
return;
|
|
}
|
|
|
|
// Dot (.) U+002E can also start a floating-point number, hence the need
|
|
// to check the next character.
|
|
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
|
|
this->scanNumericLiteral(token);
|
|
return;
|
|
}
|
|
|
|
if (isDecimalDigit(cp)) {
|
|
this->scanNumericLiteral(token);
|
|
return;
|
|
}
|
|
|
|
if (UNLIKELY(cp == '`')) {
|
|
++this->index;
|
|
this->scanTemplate(token, true);
|
|
return;
|
|
}
|
|
// Possible identifier start in a surrogate pair.
|
|
if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) {
|
|
goto ScanID;
|
|
}
|
|
|
|
this->scanPunctuator(token, cp);
|
|
return;
|
|
|
|
ScanID:
|
|
this->scanIdentifier(token, cp);
|
|
return;
|
|
}
|
|
} // namespace Escargot
|