escargot/src/parser/Lexer.cpp
Seonghyun Kim 87c8a581bd Remove useless peekChar calling in Lexer::lex
Signed-off-by: Seonghyun Kim <sh8281.kim@samsung.com>
2022-09-29 10:43:21 +09:00

2387 lines
71 KiB
C++

/*
* Copyright (c) 2016-present Samsung Electronics Co., Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#include "Escargot.h"
#include "parser/Lexer.h"
#include "parser/UnicodeIdentifierTables.h"
#include "parser/esprima_cpp/ParserContext.h"
// These two must be the last because they overwrite the ASSERT macro.
#include "double-conversion.h"
#include "ieee.h"
using namespace Escargot::EscargotLexer;
namespace Escargot {
#define IDENT_RANGE_LONG 200
/* The largest code-point that an UTF16 surrogate pair can represent is 0x10ffff,
* so any codepoint above this can be a valid value for empty. The UINT32_MAX is
* chosen because it is a valid immediate for machine instructions. */
#define EMPTY_CODE_POINT UINT32_MAX
/* The largest octal value is 255, so any higher
* value can represent an invalid octal value. */
#define NON_OCTAL_VALUE 256
char EscargotLexer::g_asciiRangeCharMap[128] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
LexerIsCharWhiteSpace,
LexerIsCharLineTerminator,
LexerIsCharWhiteSpace,
LexerIsCharWhiteSpace,
LexerIsCharLineTerminator,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
LexerIsCharWhiteSpace,
0,
0,
0,
LexerIsCharIdentStart | LexerIsCharIdent,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
LexerIsCharIdent,
0,
0,
0,
0,
0,
0,
0,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
0,
LexerIsCharIdentStart | LexerIsCharIdent,
0,
0,
LexerIsCharIdentStart | LexerIsCharIdent,
0,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
LexerIsCharIdentStart | LexerIsCharIdent,
0,
0,
0,
0,
0
};
NEVER_INLINE bool EscargotLexer::isWhiteSpaceSlowCase(char16_t ch)
{
ASSERT(ch >= 0x80);
if (LIKELY(ch < 0x1680)) {
return (ch == 0xA0);
}
return (ch == 0x1680 || ch == 0x2000 || ch == 0x2001
|| ch == 0x2002 || ch == 0x2003 || ch == 0x2004 || ch == 0x2005 || ch == 0x2006
|| ch == 0x2007 || ch == 0x2008 || ch == 0x2009 || ch == 0x200A || ch == 0x202F
|| ch == 0x205F || ch == 0x3000 || ch == 0xFEFF);
}
static NEVER_INLINE bool isIdentifierPartSlow(char32_t ch)
{
int bottom = 0;
int top = (EscargotLexer::basic_plane_length / sizeof(uint16_t)) - 1;
while (true) {
int middle = (bottom + top) >> 1;
char32_t rangeStart = identRangeStart[middle];
if (ch >= rangeStart) {
if (ch < identRangeStart[middle + 1]) {
char32_t length = identRangeLength[middle];
if (UNLIKELY(length >= IDENT_RANGE_LONG)) {
length = identRangeLongLength[length - IDENT_RANGE_LONG];
}
return ch <= rangeStart + length;
}
bottom = middle + 1;
} else {
top = middle;
}
if (bottom == top) {
return false;
}
}
}
static NEVER_INLINE bool isIdentifierPartSlowSupplementary(char32_t ch)
{
int bottom = 0;
int top = (EscargotLexer::supplementary_plane_length / sizeof(uint32_t)) - 1;
while (true) {
int middle = (bottom + top) >> 1;
char32_t rangeStart = identRangeStartSupplementaryPlane[middle];
if (ch >= rangeStart) {
if (ch < identRangeStartSupplementaryPlane[middle + 1]) {
char32_t length = identRangeLengthSupplementaryPlane[middle];
return ch <= rangeStart + length;
}
bottom = middle + 1;
} else {
top = middle;
}
if (bottom == top) {
return false;
}
}
}
static ALWAYS_INLINE bool isIdentifierPart(char32_t ch)
{
if (LIKELY(ch < 128)) {
return g_asciiRangeCharMap[ch] & LexerIsCharIdent;
}
return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch);
}
static ALWAYS_INLINE bool isIdentifierStart(char32_t ch)
{
if (LIKELY(ch < 128)) {
return g_asciiRangeCharMap[ch] & LexerIsCharIdentStart;
}
return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch);
}
static ALWAYS_INLINE bool isDecimalDigit(char16_t ch)
{
return (ch >= '0' && ch <= '9');
}
static ALWAYS_INLINE bool isDecimalDigitOrUnderscore(char16_t ch, bool& seenUnderScore)
{
if (UNLIKELY(ch == '_')) {
seenUnderScore = true;
return true;
}
return (ch >= '0' && ch <= '9');
}
static ALWAYS_INLINE bool isHexDigit(char16_t ch)
{
return isDecimalDigit(ch) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
}
static ALWAYS_INLINE bool isHexDigitOrUnderscore(char16_t ch, bool& seenUnderScore)
{
return isDecimalDigitOrUnderscore(ch, seenUnderScore) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
}
static ALWAYS_INLINE bool isOctalDigit(char16_t ch)
{
return (ch >= '0' && ch <= '7');
}
static ALWAYS_INLINE char16_t octalValue(char16_t ch)
{
ASSERT(isOctalDigit(ch));
return ch - '0';
}
static ALWAYS_INLINE uint8_t toHexNumericValue(char16_t ch)
{
return ch < 'A' ? ch - '0' : ((ch - 'A' + 10) & 0xF);
}
static int hexValue(char16_t ch)
{
if (ch >= '0' && ch <= '9') {
return ch - '0';
}
ASSERT((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
return (ch | 0x20) - ('a' - 10);
}
struct ParserCharPiece {
char16_t data[3];
size_t length;
ParserCharPiece(const char32_t a)
{
if (a < 0x10000) {
data[0] = a;
data[1] = 0;
length = 1;
} else {
data[0] = (char16_t)(0xD800 + ((a - 0x10000) >> 10));
data[1] = (char16_t)(0xDC00 + ((a - 0x10000) & 1023));
data[2] = 0;
length = 2;
}
}
};
AtomicString keywordToString(::Escargot::Context* ctx, KeywordKind keyword)
{
switch (keyword) {
case IfKeyword:
return ctx->staticStrings().stringIf;
case InKeyword:
return ctx->staticStrings().stringIn;
case DoKeyword:
return ctx->staticStrings().stringDo;
case VarKeyword:
return ctx->staticStrings().var;
case ForKeyword:
return ctx->staticStrings().stringFor;
case NewKeyword:
return ctx->staticStrings().stringNew;
case TryKeyword:
return ctx->staticStrings().stringTry;
case ThisKeyword:
return ctx->staticStrings().stringThis;
case ElseKeyword:
return ctx->staticStrings().stringElse;
case CaseKeyword:
return ctx->staticStrings().stringCase;
case VoidKeyword:
return ctx->staticStrings().stringVoid;
case WithKeyword:
return ctx->staticStrings().with;
case EnumKeyword:
return ctx->staticStrings().stringEnum;
case WhileKeyword:
return ctx->staticStrings().stringWhile;
case BreakKeyword:
return ctx->staticStrings().stringBreak;
case CatchKeyword:
return ctx->staticStrings().stringCatch;
case ThrowKeyword:
return ctx->staticStrings().stringThrow;
case ConstKeyword:
return ctx->staticStrings().stringConst;
case ClassKeyword:
return ctx->staticStrings().stringClass;
case SuperKeyword:
return ctx->staticStrings().super;
case ReturnKeyword:
return ctx->staticStrings().stringReturn;
case TypeofKeyword:
return ctx->staticStrings().stringTypeof;
case DeleteKeyword:
return ctx->staticStrings().stringDelete;
case SwitchKeyword:
return ctx->staticStrings().stringSwitch;
case ExportKeyword:
return ctx->staticStrings().stringExport;
case ImportKeyword:
return ctx->staticStrings().stringImport;
case DefaultKeyword:
return ctx->staticStrings().stringDefault;
case FinallyKeyword:
return ctx->staticStrings().finally;
case ExtendsKeyword:
return ctx->staticStrings().extends;
case FunctionKeyword:
return ctx->staticStrings().function;
case ContinueKeyword:
return ctx->staticStrings().stringContinue;
case DebuggerKeyword:
return ctx->staticStrings().debugger;
case InstanceofKeyword:
return ctx->staticStrings().instanceof ;
case ImplementsKeyword:
return ctx->staticStrings().implements;
case InterfaceKeyword:
return ctx->staticStrings().interface;
case PackageKeyword:
return ctx->staticStrings().package;
case PrivateKeyword:
return ctx->staticStrings().stringPrivate;
case ProtectedKeyword:
return ctx->staticStrings().stringProtected;
case PublicKeyword:
return ctx->staticStrings().stringPublic;
case StaticKeyword:
return ctx->staticStrings().stringStatic;
case YieldKeyword:
return ctx->staticStrings().yield;
case LetKeyword:
return ctx->staticStrings().let;
case NullKeyword:
return ctx->staticStrings().null;
case TrueKeyword:
return ctx->staticStrings().stringTrue;
case FalseKeyword:
return ctx->staticStrings().stringFalse;
case GetKeyword:
return ctx->staticStrings().get;
case SetKeyword:
return ctx->staticStrings().set;
case EvalKeyword:
return ctx->staticStrings().eval;
case ArgumentsKeyword:
return ctx->staticStrings().arguments;
case OfKeyword:
return ctx->staticStrings().of;
case AsyncKeyword:
return ctx->staticStrings().async;
case AwaitKeyword:
return ctx->staticStrings().await;
case AsKeyword:
return ctx->staticStrings().as;
case FromKeyword:
return ctx->staticStrings().from;
default:
ASSERT_NOT_REACHED();
return ctx->staticStrings().error;
}
}
void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* description, ErrorObject::Code code)
{
UTF16StringDataNonGCStd msg = u"Line ";
const size_t bufferLength = 64;
char lineStringBuf[bufferLength];
char* bufPtr = lineStringBuf + bufferLength - 2;
/* Adds ": " at the end. */
bufPtr[0] = ':';
bufPtr[1] = ' ';
size_t value = line;
do {
ASSERT(bufPtr > lineStringBuf);
--bufPtr;
*bufPtr = value % 10 + '0';
value /= 10;
} while (value > 0);
msg += UTF16StringDataNonGCStd(bufPtr, lineStringBuf + bufferLength);
if (description->length()) {
msg += UTF16StringDataNonGCStd(description->toUTF16StringData().data());
}
esprima::Error* error = new (NoGC) esprima::Error(new UTF16String(msg.data(), msg.length()));
error->index = index;
error->lineNumber = line;
error->column = col;
error->description = description;
error->errorCode = code;
throw error;
};
ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const
{
return ParserStringView(source, this->start, this->end);
}
StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const
{
return StringView(source, this->start, this->end);
}
ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source)
{
return ParserStringView(source, this->start, this->end);
}
StringView Scanner::ScannerResult::relatedSource(const StringView& source)
{
return StringView(source, this->start, this->end);
}
Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance)
{
ASSERT(this->type == Token::StringLiteralToken);
if (UNLIKELY(this->hasAllocatedString)) {
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
constructStringLiteral(scannerInstance);
}
return this->valueStringLiteralData.m_stringIfNewlyAllocated;
}
// check if string is one of typeof strings
// we only consider the most common cases which are undefined, object, function
size_t start = this->valueStringLiteralData.m_start;
size_t end = this->valueStringLiteralData.m_end;
size_t length = end - start;
if (length > 5 && length < 10) {
ParserStringView str(scannerInstance->source, start, end);
switch (str.bufferedCharAt(0)) {
case 'o': {
if (length == 6 && str.equalsSameLength("object", 1)) {
return scannerInstance->escargotContext->staticStrings().object.string();
}
break;
}
case 'f': {
if (length == 8 && str.equalsSameLength("function", 1)) {
return scannerInstance->escargotContext->staticStrings().function.string();
}
break;
}
case 'u': {
if (length == 9 && str.equalsSameLength("undefined", 1)) {
return scannerInstance->escargotContext->staticStrings().undefined.string();
}
break;
}
default: {
return new StringView(scannerInstance->sourceAsNormalView, start, end);
}
}
}
return new StringView(scannerInstance->sourceAsNormalView, start, end);
}
ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
{
if (this->type == Token::KeywordToken) {
AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind);
return ParserStringView(as.string(), 0, as.string()->length());
}
if (this->hasAllocatedString) {
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
constructStringLiteral(scannerInstance);
}
return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
}
return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
}
std::pair<Value, bool> Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance)
{
if (this->hasNonComputedNumberLiteral) {
const auto& bd = scannerInstance->source.bufferAccessData();
char* buffer;
int length = this->end - this->start;
if (UNLIKELY(this->hasNumberSeparatorOnNumberLiteral)) {
buffer = ALLOCA(this->end - this->start, char, ec);
int underScoreCount = 0;
for (int i = 0; i < length; i++) {
auto c = bd.charAt(i + this->start);
if (c == '_') {
underScoreCount++;
} else {
buffer[i - underScoreCount] = c;
}
}
length -= underScoreCount;
ASSERT(underScoreCount != 0);
} else {
if (bd.has8BitContent) {
buffer = ((char*)bd.buffer) + this->start;
} else {
buffer = ALLOCA(this->end - this->start, char, ec);
for (int i = 0; i < length; i++) {
buffer[i] = bd.uncheckedCharAtFor16Bit(i + this->start);
}
}
}
// bigint case
if (UNLIKELY(buffer[length - 1] == 'n')) {
return std::make_pair(Value(BigInt::parseString(buffer, length - 1).value()), true);
}
int lengthDummy;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::ALLOW_HEX
| double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
| double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES,
0.0, double_conversion::Double::NaN(),
"Infinity", "NaN");
double ll = converter.StringToDouble(buffer, length, &lengthDummy);
this->valueNumber = ll;
this->hasNonComputedNumberLiteral = false;
}
return std::make_pair(Value(this->valueNumber), false);
}
void Scanner::ScannerResult::constructStringLiteralHelperAppendUTF16(Scanner* scannerInstance, char16_t ch, UTF16StringDataNonGCStd& stringUTF16, bool& isEveryCharLatin1)
{
switch (ch) {
case 'u':
case 'x': {
char32_t param;
if (scannerInstance->peekChar() == '{') {
++scannerInstance->index;
param = scannerInstance->scanUnicodeCodePointEscape();
} else {
param = scannerInstance->scanHexEscape(ch);
}
ParserCharPiece piece(param);
stringUTF16.append(piece.data, piece.data + piece.length);
if (piece.length != 1 || piece.data[0] >= 256) {
isEveryCharLatin1 = false;
}
return;
}
case 'n':
stringUTF16 += '\n';
return;
case 'r':
stringUTF16 += '\r';
return;
case 't':
stringUTF16 += '\t';
return;
case 'b':
stringUTF16 += '\b';
return;
case 'f':
stringUTF16 += '\f';
return;
case 'v':
stringUTF16 += '\x0B';
return;
default:
if (ch && isOctalDigit(ch)) {
uint16_t octToDec = scannerInstance->octalToDecimal(ch, true);
stringUTF16 += octToDec;
ASSERT(octToDec < 256);
} else {
stringUTF16 += ch;
if (ch >= 256) {
isEveryCharLatin1 = false;
}
}
return;
}
}
void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance)
{
size_t indexBackup = scannerInstance->index;
size_t lineNumberBackup = scannerInstance->lineNumber;
size_t lineStartBackup = scannerInstance->lineStart;
scannerInstance->index = this->start;
char16_t quote = scannerInstance->peekChar();
ASSERT((quote == '\'' || quote == '"'));
// 'String literal must starts with a quote');
++scannerInstance->index;
bool isEveryCharLatin1 = true;
UTF16StringDataNonGCStd stringUTF16;
while (true) {
char16_t ch = scannerInstance->peekChar();
++scannerInstance->index;
if (ch == quote) {
quote = '\0';
break;
} else if (UNLIKELY(ch == '\\')) {
ch = scannerInstance->peekChar();
++scannerInstance->index;
if (!ch || !isLineTerminator(ch)) {
this->constructStringLiteralHelperAppendUTF16(scannerInstance, ch, stringUTF16, isEveryCharLatin1);
} else {
++scannerInstance->lineNumber;
char16_t bufferedChar = scannerInstance->peekChar();
if ((ch == '\r' && bufferedChar == '\n') || (ch == '\n' && bufferedChar == '\r')) {
++scannerInstance->index;
}
scannerInstance->lineStart = scannerInstance->index;
}
} else if (UNLIKELY(isLineTerminator(ch))) {
break;
} else {
stringUTF16 += ch;
if (ch >= 256) {
isEveryCharLatin1 = false;
}
}
}
scannerInstance->index = indexBackup;
scannerInstance->lineNumber = lineNumberBackup;
scannerInstance->lineStart = lineStartBackup;
String* newStr;
if (isEveryCharLatin1) {
newStr = String::fromLatin1(stringUTF16.data(), stringUTF16.length());
} else {
newStr = new UTF16String(stringUTF16.data(), stringUTF16.length());
}
this->valueStringLiteralData.m_stringIfNewlyAllocated = newStr;
}
Scanner::Scanner(::Escargot::Context* escargotContext, ::Escargot::esprima::ParserContext* parserContext, StringView code, bool isModule, size_t startLine, size_t startColumn)
: source(code, 0, code.length())
, sourceAsNormalView(code)
, escargotContext(escargotContext)
, parserContext(parserContext)
, sourceCodeAccessData(code.bufferAccessData())
, isModule(isModule)
, length(code.length())
, index(0)
, lineNumber(startLine)
, lineStart(startColumn)
{
ASSERT(escargotContext != nullptr);
// trackComment = false;
}
void Scanner::resetSource(StringView code)
{
this->source = ParserStringView(code, 0, code.length());
this->sourceAsNormalView = code;
this->sourceCodeAccessData = code.bufferAccessData();
this->length = code.length();
this->index = 0;
this->lineNumber = 1;
this->lineStart = 0;
}
void Scanner::skipSingleLine()
{
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
if (isLineTerminator(ch)) {
if (ch == 13 && this->peekCharWithoutEOF() == 10) {
++this->index;
}
++this->lineNumber;
this->lineStart = this->index;
return;
}
}
}
void Scanner::skipSingleLineComment(void)
{
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
if (isLineTerminator(ch)) {
if (ch == 13 && this->peekCharWithoutEOF() == 10) {
++this->index;
}
++this->lineNumber;
this->lineStart = this->index;
// return comments;
return;
}
}
}
void Scanner::skipMultiLineComment(void)
{
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
if (isLineTerminator(ch)) {
if (ch == 0x0D && this->peekCharWithoutEOF() == 0x0A) {
++this->index;
}
++this->lineNumber;
this->lineStart = this->index;
} else if (ch == 0x2A && this->peekCharWithoutEOF() == 0x2F) {
// Block comment ends with '*/'.
++this->index;
return;
}
}
throwUnexpectedToken();
}
char32_t Scanner::scanHexEscape(char prefix)
{
size_t len = (prefix == 'u') ? 4 : 2;
char32_t code = 0;
for (size_t i = 0; i < len; ++i) {
if (!this->eof() && isHexDigit(this->peekCharWithoutEOF())) {
code = code * 16 + hexValue(this->peekCharWithoutEOF());
++this->index;
} else {
return EMPTY_CODE_POINT;
}
}
return code;
}
char32_t Scanner::scanUnicodeCodePointEscape()
{
// At least, one hex digit is required.
if (this->eof() || this->peekCharWithoutEOF() == '}') {
this->throwUnexpectedToken();
}
char32_t code = 0;
char16_t ch;
while (!this->eof()) {
ch = this->peekCharWithoutEOF();
++this->index;
if (!isHexDigit(ch)) {
break;
}
code = code * 16 + hexValue(ch);
}
if (code > 0x10FFFF || ch != '}') {
this->throwUnexpectedToken();
}
return code;
}
Scanner::ScanIDResult Scanner::getIdentifier()
{
const size_t start = this->index;
++this->index;
while (UNLIKELY(!this->eof())) {
const char16_t ch = this->peekCharWithoutEOF();
if (UNLIKELY(ch == 0x5C)) {
// Blackslash (U+005C) marks Unicode escape sequence.
this->index = start;
return this->getComplexIdentifier();
} else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) {
// Need to handle surrogate pairs.
this->index = start;
return this->getComplexIdentifier();
}
if (isIdentifierPart(ch)) {
++this->index;
} else {
break;
}
}
const auto& srcData = this->source.bufferAccessData();
StringBufferAccessData ad(srcData.has8BitContent, this->index - start,
srcData.has8BitContent ? reinterpret_cast<void*>(((LChar*)srcData.buffer) + start) : reinterpret_cast<void*>(((char16_t*)srcData.buffer) + start));
return std::make_tuple(ad, nullptr);
}
Scanner::ScanIDResult Scanner::getComplexIdentifier()
{
char16_t cp = this->codePointAt(this->index);
ParserCharPiece piece = ParserCharPiece(cp);
UTF16StringDataNonGCStd id(piece.data, piece.length);
this->index += id.length();
// '\u' (U+005C, U+0075) denotes an escaped character.
char32_t ch;
if (cp == 0x5C) {
if (this->peekChar() != 0x75) {
this->throwUnexpectedToken();
}
++this->index;
if (this->peekChar() == '{') {
++this->index;
ch = this->scanUnicodeCodePointEscape();
} else {
ch = this->scanHexEscape('u');
cp = ch;
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) {
this->throwUnexpectedToken();
}
}
id = ch;
}
while (!this->eof()) {
cp = this->codePointAt(this->index);
if (!isIdentifierPart(cp)) {
break;
}
// ch = Character.fromCodePoint(cp);
ch = cp;
if (ch >= 128 && this->peekChar() >= 0xD800 && this->peekChar() < 0xDFFF) {
ch = peekChar();
++this->index;
char32_t ch2 = this->peekChar();
if (U16_IS_TRAIL(ch2)) {
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
}
--this->index;
}
piece = ParserCharPiece(ch);
id += UTF16StringDataNonGCStd(piece.data, piece.length);
this->index += piece.length;
// '\u' (U+005C, U+0075) denotes an escaped character.
if (cp == 0x5C) {
// id = id.substr(0, id.length - 1);
id.erase(id.length() - 1);
if (this->peekChar() != 0x75) {
this->throwUnexpectedToken();
}
++this->index;
if (this->peekChar() == '{') {
++this->index;
ch = this->scanUnicodeCodePointEscape();
} else {
ch = this->scanHexEscape('u');
cp = ch;
if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierPart(cp)) {
this->throwUnexpectedToken();
}
}
piece = ParserCharPiece(ch);
id += UTF16StringDataNonGCStd(piece.data, piece.length);
}
}
String* str = new UTF16String(id.data(), id.length());
if (UNLIKELY(this->parserContext->await && id == u"await")) {
this->throwUnexpectedToken(Messages::KeywordMustNotContainEscapedCharacters);
}
return std::make_tuple(str->bufferAccessData(), str);
}
uint16_t Scanner::octalToDecimal(char16_t ch, bool octal)
{
// \0 is not octal escape sequence
char16_t code = octalValue(ch);
octal |= (ch != '0');
if (!this->eof() && isOctalDigit(this->peekChar())) {
octal = true;
code = code * 8 + octalValue(this->peekChar());
++this->index;
// 3 digits are only allowed when string starts
// with 0, 1, 2, 3
// if ('0123'.indexOf(ch) >= 0 && !this->eof() && Character.isOctalDigit(this->source.charCodeAt(this->index))) {
if ((ch >= '0' && ch <= '3') && !this->eof() && isOctalDigit(this->peekChar())) {
code = code * 8 + octalValue(this->peekChar());
++this->index;
}
}
ASSERT(!octal || code < NON_OCTAL_VALUE);
return octal ? code : NON_OCTAL_VALUE;
};
void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)
{
const size_t start = this->index;
PunctuatorKind kind;
// Check for most common single-character punctuators.
++this->index;
switch (ch) {
case '(':
kind = LeftParenthesis;
break;
case '{':
kind = LeftBrace;
break;
case '.':
kind = Period;
if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
// Spread operator "..."
this->index += 2;
kind = PeriodPeriodPeriod;
}
break;
case '}':
kind = RightBrace;
break;
case ')':
kind = RightParenthesis;
break;
case ';':
kind = SemiColon;
break;
case ',':
kind = Comma;
break;
case '[':
kind = LeftSquareBracket;
break;
case ']':
kind = RightSquareBracket;
break;
case ':':
kind = Colon;
break;
case '?':
kind = GuessMark;
ch = this->peekChar();
if (ch == '?') {
++this->index;
kind = NullishCoalescing;
if (this->peekChar() == '=') {
kind = LogicalNullishEqual;
++this->index;
}
} else if (ch == '.') {
++this->index;
kind = GuessDot;
}
break;
case '~':
kind = Wave;
break;
case '>':
ch = this->peekChar();
kind = RightInequality;
if (ch == '>') {
++this->index;
ch = this->peekChar();
kind = RightShift;
if (ch == '>') {
++this->index;
kind = UnsignedRightShift;
if (this->peekChar() == '=') {
++this->index;
kind = UnsignedRightShiftEqual;
}
} else if (ch == '=') {
kind = RightShiftEqual;
++this->index;
}
} else if (ch == '=') {
kind = RightInequalityEqual;
++this->index;
}
break;
case '<':
ch = this->peekChar();
kind = LeftInequality;
if (ch == '<') {
++this->index;
kind = LeftShift;
if (this->peekChar() == '=') {
kind = LeftShiftEqual;
++this->index;
}
} else if (ch == '=') {
kind = LeftInequalityEqual;
++this->index;
}
break;
case '=':
ch = this->peekChar();
kind = Substitution;
if (ch == '=') {
++this->index;
kind = Equal;
if (this->peekChar() == '=') {
kind = StrictEqual;
++this->index;
}
} else if (ch == '>') {
kind = Arrow;
++this->index;
}
break;
case '!':
kind = ExclamationMark;
if (this->peekChar() == '=') {
++this->index;
kind = NotEqual;
if (this->peekChar() == '=') {
kind = NotStrictEqual;
++this->index;
}
}
break;
case '&':
ch = this->peekChar();
kind = BitwiseAnd;
if (ch == '&') {
kind = LogicalAnd;
++this->index;
if (this->peekChar() == '=') {
++this->index;
kind = LogicalAndEqual;
}
} else if (ch == '=') {
kind = BitwiseAndEqual;
++this->index;
}
break;
case '|':
ch = this->peekChar();
kind = BitwiseOr;
if (ch == '|') {
kind = LogicalOr;
++this->index;
if (this->peekChar() == '=') {
++this->index;
kind = LogicalOrEqual;
}
} else if (ch == '=') {
kind = BitwiseOrEqual;
++this->index;
}
break;
case '^':
kind = BitwiseXor;
if (this->peekChar() == '=') {
kind = BitwiseXorEqual;
++this->index;
}
break;
case '+':
ch = this->peekChar();
kind = Plus;
if (ch == '+') {
kind = PlusPlus;
++this->index;
} else if (ch == '=') {
kind = PlusEqual;
++this->index;
}
break;
case '-':
ch = this->peekChar();
kind = Minus;
if (ch == '-') {
kind = MinusMinus;
++this->index;
} else if (ch == '=') {
kind = MinusEqual;
++this->index;
}
break;
case '*':
ch = this->peekChar();
kind = Multiply;
if (ch == '=') {
kind = MultiplyEqual;
++this->index;
} else if (ch == '*') {
kind = Exponentiation;
++this->index;
if (this->peekChar() == '=') {
kind = ExponentiationEqual;
++this->index;
}
}
break;
case '/':
kind = Divide;
if (this->peekChar() == '=') {
kind = DivideEqual;
++this->index;
}
break;
case '%':
kind = Mod;
if (this->peekChar() == '=') {
kind = ModEqual;
++this->index;
}
break;
case '#':
kind = Hash;
if (this->index == 1 && this->peekChar() == '!') {
kind = HashBang;
++this->index;
}
break;
default:
this->throwUnexpectedToken();
kind = PunctuatorKindEnd;
break;
}
token->setPunctuatorResult(this->lineNumber, this->lineStart, start, this->index, kind);
}
void Scanner::testNumericSeparator(size_t start, bool isBigInt, bool isHex, bool isBinary, bool isOctal)
{
for (size_t i = start; i < this->index - 1; i++) {
char16_t ch = this->sourceCharAt(i);
if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError);
}
if (UNLIKELY(isHex && (ch == 'x' || ch == 'X') && this->sourceCharAt(i + 1) == '_')) {
this->throwUnexpectedToken();
}
if (UNLIKELY(isBinary && (ch == 'b' || ch == 'B') && this->sourceCharAt(i + 1) == '_')) {
this->throwUnexpectedToken();
}
if (UNLIKELY(isOctal && (ch == 'o' || ch == 'O') && this->sourceCharAt(i + 1) == '_')) {
this->throwUnexpectedToken();
}
}
if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError);
}
}
void Scanner::scanHexLiteral(Scanner::ScannerResult* token, size_t start)
{
ASSERT(token != nullptr);
uint64_t number = 0;
double numberDouble = 0.0;
bool shouldUseDouble = false;
bool scanned = false;
bool seenUnderscore = false;
size_t shiftCount = 0;
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
if (!isHexDigitOrUnderscore(ch, seenUnderscore)) {
break;
}
if (UNLIKELY(ch == '_')) {
this->index++;
continue;
}
if (shouldUseDouble) {
numberDouble = numberDouble * 16 + toHexNumericValue(ch);
} else {
number = (number << 4) + toHexNumericValue(ch);
if (++shiftCount >= 16) {
shouldUseDouble = true;
numberDouble = number;
number = 0;
}
}
this->index++;
scanned = true;
}
if (!scanned) {
this->throwUnexpectedToken();
}
bool isEof = this->eof();
bool isBigInt = !isEof && this->peekChar() == 'n';
if (UNLIKELY(isBigInt)) {
++this->index;
}
if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) {
this->throwUnexpectedToken();
}
if (UNLIKELY(seenUnderscore)) {
testNumericSeparator(start, isBigInt, true, false, false);
}
if (shouldUseDouble) {
ASSERT(number == 0);
token->setNumericLiteralResult(numberDouble, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
} else {
ASSERT(numberDouble == 0.0);
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
}
}
void Scanner::scanBinaryLiteral(Scanner::ScannerResult* token, size_t start)
{
ASSERT(token != nullptr);
uint64_t number = 0;
bool scanned = false;
bool seenUnderscore = false;
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
if (ch == '0' || ch == '1') {
number = (number << 1) + ch - '0';
this->index++;
scanned = true;
} else if (ch == '_') {
this->index++;
seenUnderscore = true;
} else {
break;
}
}
if (!scanned) {
// only 0b or 0B
this->throwUnexpectedToken();
}
bool isEof = this->eof();
bool isBigInt = !isEof && this->peekChar() == 'n';
if (UNLIKELY(isBigInt)) {
++this->index;
}
if (UNLIKELY(!isEof && (isIdentifierStart(this->peekChar()) || isDecimalDigit(this->peekChar())))) {
this->throwUnexpectedToken();
}
if (UNLIKELY(seenUnderscore)) {
testNumericSeparator(start, isBigInt, false, true, false);
}
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
}
void Scanner::scanOctalLiteral(Scanner::ScannerResult* token, char16_t prefix, size_t start, bool isLegacyOctal)
{
ASSERT(token != nullptr);
uint64_t number = 0;
bool scanned = false;
bool octal = isOctalDigit(prefix);
bool seenUnderscore = false;
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
if (!isLegacyOctal) {
if (UNLIKELY(ch == '_')) {
this->index++;
seenUnderscore = true;
continue;
}
}
if (!isOctalDigit(ch)) {
break;
}
number = (number << 3) + ch - '0';
this->index++;
scanned = true;
}
if (!octal && !scanned) {
// only 0o or 0O
throwUnexpectedToken();
}
bool isEof = this->eof();
bool isBigInt = !isEof && !isLegacyOctal && this->peekChar() == 'n';
if (UNLIKELY(isBigInt)) {
++this->index;
}
char16_t ch = this->peekChar();
if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
throwUnexpectedToken();
}
if (UNLIKELY(seenUnderscore)) {
testNumericSeparator(start, isBigInt, false, false, true);
}
token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore);
token->octal = octal;
}
bool Scanner::isImplicitOctalLiteral()
{
// Implicit octal, unless there is a non-octal digit.
// (Annex B.1.1 on Numeric Literals)
for (size_t i = this->index + 1; i < this->length; ++i) {
const char16_t ch = this->sourceCharAt(i);
if (ch == '8' || ch == '9') {
return false;
}
if (!isOctalDigit(ch)) {
return true;
}
}
return true;
}
void Scanner::scanNumericLiteral(Scanner::ScannerResult* token)
{
ASSERT(token != nullptr);
const size_t start = this->index;
char16_t ch = this->peekChar();
char16_t startChar = ch;
ASSERT(isDecimalDigit(ch) || (ch == '.'));
// 'Numeric literal must start with a decimal digit or a decimal point');
bool seenDotOrE = false;
bool seenUnderscore = false;
if (ch != '.') {
auto number = this->peekChar();
++this->index;
ch = this->peekChar();
// Hex number starts with '0x'.
// Octal number starts with '0'.
// Octal number in ES6 starts with '0o'.
// Binary number in ES6 starts with '0b'.
if (number == '0') {
if (ch == 'x' || ch == 'X') {
++this->index;
return this->scanHexLiteral(token, start);
}
if (ch == 'b' || ch == 'B') {
++this->index;
return this->scanBinaryLiteral(token, start);
}
if (ch == 'o' || ch == 'O') {
++this->index;
return this->scanOctalLiteral(token, ch, start, false);
}
if (ch && isOctalDigit(ch) && this->isImplicitOctalLiteral()) {
return this->scanOctalLiteral(token, ch, start, true);
}
}
while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) {
++this->index;
}
ch = this->peekChar();
}
if (ch == '.') {
seenDotOrE = true;
++this->index;
while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) {
++this->index;
}
ch = this->peekChar();
}
if (ch == 'e' || ch == 'E') {
seenDotOrE = true;
++this->index;
ch = this->peekChar();
if (ch == '+' || ch == '-') {
++this->index;
ch = this->peekChar();
}
if (isDecimalDigit(ch)) {
do {
++this->index;
ch = this->peekChar();
} while (isDecimalDigitOrUnderscore(ch, seenUnderscore));
} else {
this->throwUnexpectedToken();
}
}
bool isEof = this->eof();
bool isBigInt = !isEof && this->peekChar() == 'n';
if (UNLIKELY(isBigInt)) {
if (seenDotOrE || (startChar == '0' && (this->index - start) > 1)) {
this->throwUnexpectedToken();
}
++this->index;
}
if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) {
this->throwUnexpectedToken();
}
if (UNLIKELY(seenUnderscore)) {
if (this->sourceCharAt(start) == '0') {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator can not be used after leading 0"), ErrorObject::SyntaxError);
}
for (size_t i = start; i < this->index - 1; i++) {
char16_t ch = this->sourceCharAt(i);
if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError);
}
if (UNLIKELY(ch == '_' && (this->sourceCharAt(i + 1) == 'e' || this->sourceCharAt(i + 1) == 'E'))) {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator may not appear adjacent to ExponentPart"), ErrorObject::SyntaxError);
}
if (UNLIKELY(ch == '.' && this->sourceCharAt(i + 1) == '_')) {
this->throwUnexpectedToken();
}
}
if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) {
ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError);
}
}
token->setNumericLiteralResult(0, this->lineNumber, this->lineStart, start, this->index, true, seenUnderscore);
if (UNLIKELY(startChar == '0' && !seenDotOrE && (this->index - start) > (isBigInt ? 2 : 1))) {
token->startWithZero = true;
}
}
void Scanner::scanStringLiteral(Scanner::ScannerResult* token)
{
ASSERT(token != nullptr);
const size_t start = this->index;
char16_t quote = this->peekChar();
ASSERT((quote == '\'' || quote == '"'));
// 'String literal must starts with a quote');
++this->index;
bool octal = false;
bool isPlainCase = true;
while (LIKELY(!this->eof())) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
if (ch == quote) {
quote = '\0';
break;
} else if (UNLIKELY(ch == '\\')) {
ch = this->peekChar();
++this->index;
isPlainCase = false;
if (!ch || !isLineTerminator(ch)) {
switch (ch) {
case 'u':
if (this->peekChar() == '{') {
++this->index;
this->scanUnicodeCodePointEscape();
} else if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
}
break;
case 'x':
if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
}
break;
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case 'v':
break;
default:
if (ch && isOctalDigit(ch)) {
octal |= (this->octalToDecimal(ch, false) != NON_OCTAL_VALUE);
} else if (isDecimalDigit(ch)) {
octal = true;
}
break;
}
} else {
++this->lineNumber;
if (ch == '\r' && this->peekChar() == '\n') {
++this->index;
} else if (ch == '\n' && this->peekChar() == '\r') {
++this->index;
}
this->lineStart = this->index;
}
} else if (UNLIKELY(ch < 128 && (g_asciiRangeCharMap[ch] & LexerIsCharLineTerminator))) {
// while parsing string literal, we should not end parsing string token with 0x2028 or 0x2029
break;
}
}
if (quote != '\0') {
this->index = start;
this->throwUnexpectedToken();
}
if (isPlainCase) {
token->setResult(Token::StringLiteralToken, start + 1, this->index - 1, this->lineNumber, this->lineStart, start, this->index, octal);
} else {
// build string if needs
token->setResult(Token::StringLiteralToken, (String*)nullptr, this->lineNumber, this->lineStart, start, this->index, octal);
}
}
bool Scanner::isFutureReservedWord(const ParserStringView& id)
{
const StringBufferAccessData& data = id.bufferAccessData();
switch (data.length) {
case 4:
return data.equalsSameLength("enum");
case 5:
return data.equalsSameLength("super");
case 6:
return data.equalsSameLength("export") || data.equalsSameLength("import");
}
return false;
}
bool Scanner::isStrictModeReservedWord(::Escargot::Context* ctx, const AtomicString& identifier)
{
switch (identifier.string()->length()) {
case 3: // let
return identifier == ctx->staticStrings().let;
case 5: // yield
return identifier == ctx->staticStrings().yield;
case 6: // static public
return identifier == ctx->staticStrings().stringStatic || identifier == ctx->staticStrings().stringPublic;
case 7: // private package
return identifier == ctx->staticStrings().stringPrivate || identifier == ctx->staticStrings().package;
case 9: // protected interface
return identifier == ctx->staticStrings().stringProtected || identifier == ctx->staticStrings().interface;
case 10: // implements
return identifier == ctx->staticStrings().implements;
}
return false;
}
void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head)
{
ASSERT(token != nullptr);
// TODO apply rope-string
UTF16StringDataNonGCStd cooked;
UTF16StringDataNonGCStd raw;
bool terminated = false;
Optional<esprima::Error*> error;
size_t start = this->index;
size_t indexForError = this->index;
bool tail = false;
try {
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
indexForError = this->index;
if (ch == '`') {
tail = true;
terminated = true;
break;
} else if (ch == '$') {
if (this->peekChar() == '{') {
++this->index;
indexForError = this->index;
terminated = true;
break;
}
cooked += ch;
raw += ch;
} else if (ch == '\\') {
raw += ch;
ch = this->peekChar();
if (!isLineTerminator(ch)) {
auto currentIndex = this->index;
++this->index;
switch (ch) {
case 'n':
cooked += '\n';
break;
case 'r':
cooked += '\r';
break;
case 't':
cooked += '\t';
break;
case 'u':
if (this->peekChar() == '{') {
++this->index;
cooked += this->scanUnicodeCodePointEscape();
} else {
const size_t restore = this->index;
const char32_t unescaped = this->scanHexEscape(ch);
if (unescaped != EMPTY_CODE_POINT) {
ParserCharPiece piece(unescaped);
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
} else {
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
}
}
break;
case 'x': {
const char32_t unescaped = this->scanHexEscape(ch);
if (unescaped == EMPTY_CODE_POINT) {
this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
}
ParserCharPiece piece(unescaped);
cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
break;
}
case 'b':
cooked += '\b';
break;
case 'f':
cooked += '\f';
break;
case 'v':
cooked += '\v';
break;
default:
if (ch == '0') {
if (isDecimalDigit(this->peekChar())) {
// Illegal: \01 \02 and so on
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
}
cooked += (char16_t)'\0';
} else if (isOctalDigit(ch)) {
// Illegal: \1 \2
this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
} else {
cooked += ch;
}
break;
}
auto endIndex = this->index;
for (size_t i = currentIndex; i < endIndex; i++) {
raw += this->sourceCharAt(i);
}
} else {
++this->index;
indexForError = this->index;
++this->lineNumber;
if (ch == '\r' && this->peekChar() == '\n') {
++this->index;
indexForError = this->index;
}
if (ch == 0x2028 || ch == 0x2029) {
raw += ch;
} else {
raw += '\n';
}
this->lineStart = this->index;
}
} else if (isLineTerminator(ch)) {
++this->lineNumber;
if (ch == '\r' && this->peekChar() == '\n') {
++this->index;
indexForError = this->index;
}
if (ch == 0x2028 || ch == 0x2029) {
raw += ch;
cooked += ch;
} else {
raw += '\n';
cooked += '\n';
}
this->lineStart = this->index;
} else {
cooked += ch;
raw += ch;
}
}
if (!terminated) {
this->throwUnexpectedToken();
}
} catch (esprima::Error* err) {
error = new (GC) esprima::Error(*err);
delete err;
this->index = indexForError;
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
++this->index;
if (ch == '`') {
tail = true;
terminated = true;
break;
} else if (ch == '$') {
if (this->peekChar() == '{') {
++this->index;
terminated = true;
break;
}
cooked += ch;
raw += ch;
} else if (isLineTerminator(ch)) {
++this->lineNumber;
if (ch == '\r' && this->peekChar() == '\n') {
++this->index;
}
if (ch == 0x2028 || ch == 0x2029) {
raw += ch;
} else {
raw += '\n';
}
this->lineStart = this->index;
} else {
raw += ch;
}
}
}
ScanTemplateResult* result = new ScanTemplateResult();
result->head = head;
result->tail = tail;
result->valueRaw = UTF16StringData(raw.data(), raw.length());
if (error) {
result->error = error;
} else {
result->valueCooked = UTF16StringData(cooked.data(), cooked.length());
}
if (head) {
start--;
}
token->setTemplateTokenResult(result, this->lineNumber, this->lineStart, start, this->index);
}
String* Scanner::scanRegExpBody()
{
char16_t ch = this->peekChar();
ASSERT(ch == '/');
// assert(ch == '/', 'Regular expression literal must start with a slash');
// TODO apply rope-string
char16_t ch0 = this->peekChar();
++this->index;
UTF16StringDataNonGCStd str(&ch0, 1);
bool classMarker = false;
bool terminated = false;
while (!this->eof()) {
ch = this->peekCharWithoutEOF();
++this->index;
str += ch;
if (ch == '\\') {
ch = this->peekChar();
++this->index;
// ECMA-262 7.8.5
if (isLineTerminator(ch)) {
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
}
str += ch;
} else if (isLineTerminator(ch)) {
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
} else if (classMarker) {
if (ch == ']') {
classMarker = false;
}
} else {
if (ch == '/') {
terminated = true;
break;
} else if (ch == '[') {
classMarker = true;
}
}
}
if (!terminated) {
this->throwUnexpectedToken(Messages::UnterminatedRegExp);
}
// Exclude leading and trailing slash.
str = str.substr(1, str.length() - 2);
if (isAllASCII(str.data(), str.length())) {
return new ASCIIString(str.data(), str.length());
}
return new UTF16String(str.data(), str.length());
}
String* Scanner::scanRegExpFlags()
{
// UTF16StringData str = '';
UTF16StringDataNonGCStd flags;
while (!this->eof()) {
char16_t ch = this->peekCharWithoutEOF();
if (!isIdentifierPart(ch)) {
break;
}
++this->index;
if (ch == '\\' && !this->eof()) {
ch = this->peekChar();
if (ch == 'u') {
++this->index;
const size_t restore = this->index;
char32_t ch32 = this->scanHexEscape('u');
if (ch32 != EMPTY_CODE_POINT) {
ParserCharPiece piece(ch32);
flags += UTF16StringDataNonGCStd(piece.data, piece.length);
/*
for (str += '\\u'; restore < this->index; ++restore) {
str += this->source[restore];
}*/
} else {
this->index = restore;
flags += 'u';
// str += '\\u';
}
this->throwUnexpectedToken();
} else {
// str += '\\';
this->throwUnexpectedToken();
}
} else {
flags += ch;
// str += ch;
}
}
if (!flags.length()) {
return String::emptyString;
}
if (isAllASCII(flags.data(), flags.length())) {
return String::fromLatin1(flags.data(), flags.length());
}
return new UTF16String(flags.data(), flags.length());
}
void Scanner::scanRegExp(Scanner::ScannerResult* token)
{
ASSERT(token != nullptr);
const size_t start = this->index;
String* body = this->scanRegExpBody();
String* flags = this->scanRegExpFlags();
// const value = this->testRegExp(body.value, flags.value);
ScanRegExpResult result;
result.body = body;
result.flags = flags;
token->setResult(Token::RegularExpressionToken, this->lineNumber, this->lineStart, start, this->index);
token->valueRegExp = result;
}
// ECMA-262 11.6.2.1 Keywords
static ALWAYS_INLINE KeywordKind getKeyword(const StringBufferAccessData& data)
{
// 'const' is specialized as Keyword in V8.
// 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
// Some others are from future reserved words.
size_t length = data.length;
char16_t first = data.charAt(0);
char16_t second;
switch (first) {
case 'a':
switch (length) {
case 2:
if (data.charAt(1) == 's') {
return AsKeyword;
}
break;
case 5:
second = data.charAt(1);
if (second == 's' && data.equalsSameLength("async", 2)) {
return AsyncKeyword;
} else if (second == 'w' && data.equalsSameLength("await", 2)) {
return AwaitKeyword;
}
break;
case 9:
if (data.equalsSameLength("arguments", 1)) {
return ArgumentsKeyword;
}
break;
}
break;
case 'b':
if (length == 5 && data.equalsSameLength("break", 1)) {
return BreakKeyword;
}
break;
case 'c':
if (length == 4) {
if (data.equalsSameLength("case", 1)) {
return CaseKeyword;
}
} else if (length == 5) {
second = data.charAt(1);
if (second == 'a' && data.equalsSameLength("catch", 2)) {
return CatchKeyword;
} else if (second == 'o' && data.equalsSameLength("const", 2)) {
return ConstKeyword;
} else if (second == 'l' && data.equalsSameLength("class", 2)) {
return ClassKeyword;
}
} else if (length == 8 && data.equalsSameLength("continue", 1)) {
return ContinueKeyword;
}
break;
case 'd':
switch (length) {
case 2:
if (data.charAt(1) == 'o') {
return DoKeyword;
}
break;
case 6:
if (data.equalsSameLength("delete", 1)) {
return DeleteKeyword;
}
break;
case 7:
if (data.equalsSameLength("default", 1)) {
return DefaultKeyword;
}
break;
case 8:
if (data.equalsSameLength("debugger", 1)) {
return DebuggerKeyword;
}
break;
}
break;
case 'e':
switch (length) {
case 4:
second = data.charAt(1);
if (second == 'l' && data.equalsSameLength("else", 2)) {
return ElseKeyword;
} else if (second == 'n' && data.equalsSameLength("enum", 2)) {
return EnumKeyword;
} else if (second == 'v' && data.equalsSameLength("eval", 2)) {
return EvalKeyword;
}
break;
case 6:
if (data.equalsSameLength("export", 1)) {
return ExportKeyword;
}
break;
case 7:
if (data.equalsSameLength("extends", 1)) {
return ExtendsKeyword;
}
break;
}
break;
case 'f':
switch (length) {
case 3:
if (data.equalsSameLength("for", 1)) {
return ForKeyword;
}
break;
case 4:
if (data.equalsSameLength("from", 1)) {
return FromKeyword;
}
break;
case 5:
if (data.equalsSameLength("false", 1)) {
return FalseKeyword;
}
break;
case 7:
if (data.equalsSameLength("finally", 1)) {
return FinallyKeyword;
}
break;
case 8:
if (data.equalsSameLength("function", 1)) {
return FunctionKeyword;
}
break;
}
break;
case 'g':
if (length == 3 && data.equalsSameLength("get", 1)) {
return GetKeyword;
}
break;
case 'i':
switch (length) {
case 2:
second = data.charAt(1);
if (second == 'f') {
return IfKeyword;
} else if (second == 'n') {
return InKeyword;
}
break;
case 6:
if (data.equalsSameLength("import", 1)) {
return ImportKeyword;
}
break;
case 9:
if (data.equalsSameLength("interface", 1)) {
return InterfaceKeyword;
}
break;
case 10:
second = data.charAt(1);
if (second == 'n' && data.equalsSameLength("instanceof", 2)) {
return InstanceofKeyword;
} else if (second == 'm' && data.equalsSameLength("implements", 2)) {
return ImplementsKeyword;
}
break;
}
break;
case 'l':
if (length == 3 && data.equalsSameLength("let", 1)) {
return LetKeyword;
}
break;
case 'n':
if (length == 3 && data.equalsSameLength("new", 1)) {
return NewKeyword;
} else if (length == 4 && data.equalsSameLength("null", 1)) {
return NullKeyword;
}
break;
case 'o':
if (length == 2 && data.charAt(1) == 'f') {
return OfKeyword;
}
break;
case 'p':
switch (length) {
case 6:
if (data.equalsSameLength("public", 1)) {
return PublicKeyword;
}
break;
case 7:
second = data.charAt(1);
if (second == 'a' && data.equalsSameLength("package", 2)) {
return PackageKeyword;
} else if (second == 'r' && data.equalsSameLength("private", 2)) {
return PrivateKeyword;
}
break;
case 9:
if (data.equalsSameLength("protected", 1)) {
return ProtectedKeyword;
}
break;
}
break;
case 'r':
if (length == 6 && data.equalsSameLength("return", 1)) {
return ReturnKeyword;
}
break;
case 's':
switch (length) {
case 3:
if (data.equalsSameLength("set", 1)) {
return SetKeyword;
}
break;
case 5:
if (data.equalsSameLength("super", 1)) {
return SuperKeyword;
}
break;
case 6:
second = data.charAt(1);
if (second == 'w' && data.equalsSameLength("switch", 2)) {
return SwitchKeyword;
} else if (second == 't' && data.equalsSameLength("static", 2)) {
return StaticKeyword;
}
break;
}
break;
case 't':
switch (length) {
case 3:
if (data.equalsSameLength("try", 1)) {
return TryKeyword;
}
break;
case 4:
second = data.charAt(1);
if (second == 'h' && data.equalsSameLength("this", 2)) {
return ThisKeyword;
} else if (second == 'r' && data.equalsSameLength("true", 2)) {
return TrueKeyword;
}
break;
case 5:
if (data.equalsSameLength("throw", 1)) {
return ThrowKeyword;
}
break;
case 6:
if (data.equalsSameLength("typeof", 1)) {
return TypeofKeyword;
}
break;
}
break;
case 'v':
if (length == 3 && data.equalsSameLength("var", 1)) {
return VarKeyword;
} else if (length == 4 && data.equalsSameLength("void", 1)) {
return VoidKeyword;
}
break;
case 'w':
if (length == 4 && data.equalsSameLength("with", 1)) {
return WithKeyword;
} else if (length == 5 && data.equalsSameLength("while", 1)) {
return WhileKeyword;
}
break;
case 'y':
if (length == 5 && data.equalsSameLength("yield", 1)) {
return YieldKeyword;
}
break;
}
return NotKeyword;
}
ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0)
{
ASSERT(token != nullptr);
Token type = Token::IdentifierToken;
const size_t start = this->index;
// Backslash (U+005C) starts an escaped character.
ScanIDResult id = UNLIKELY(ch0 == 0x5C) ? this->getComplexIdentifier() : this->getIdentifier();
const auto& data = std::get<0>(id);
const size_t end = this->index;
// There is no keyword or literal with only one character.
// Thus, it must be an identifier.
if (data.length > 1) {
KeywordKind keywordKind = getKeyword(data);
token->secondaryKeywordKind = keywordKind;
switch (keywordKind) {
case NotKeyword:
break;
case NullKeyword:
type = Token::NullLiteralToken;
break;
case TrueKeyword:
case FalseKeyword:
type = BooleanLiteralToken;
break;
case YieldKeyword:
case LetKeyword:
token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
return;
default:
if (keywordKind >= StrictModeReservedWord) {
break;
}
token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
return;
}
}
if (UNLIKELY(std::get<1>(id) != nullptr)) {
token->setResult(type, std::get<1>(id), this->lineNumber, this->lineStart, start, end);
} else {
token->setResult(type, start, end, this->lineNumber, this->lineStart, start, end);
}
}
void Scanner::lex(Scanner::ScannerResult* token)
{
ASSERT(token != nullptr);
token->resetResult();
if (UNLIKELY(this->eof())) {
token->setResult(Token::EOFToken, this->lineNumber, this->lineStart, this->index, this->index);
return;
}
char16_t cp = this->peekCharWithoutEOF();
if (UNLIKELY(cp >= 128 && cp >= 0xD800 && cp < 0xDFFF)) {
++this->index;
char32_t ch2 = this->peekChar();
if (U16_IS_TRAIL(ch2)) {
cp = U16_GET_SUPPLEMENTARY(cp, ch2);
} else {
this->throwUnexpectedToken();
}
}
if (isIdentifierStart(cp)) {
goto ScanID;
}
// String literal starts with single quote (U+0027) or double quote (U+0022).
if (cp == 0x27 || cp == 0x22) {
this->scanStringLiteral(token);
return;
}
// Dot (.) U+002E can also start a floating-point number, hence the need
// to check the next character.
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
this->scanNumericLiteral(token);
return;
}
if (isDecimalDigit(cp)) {
this->scanNumericLiteral(token);
return;
}
if (UNLIKELY(cp == '`')) {
++this->index;
this->scanTemplate(token, true);
return;
}
// Possible identifier start in a surrogate pair.
if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) {
goto ScanID;
}
this->scanPunctuator(token, cp);
return;
ScanID:
this->scanIdentifier(token, cp);
return;
}
} // namespace Escargot