/* * Copyright (c) 2016-present Samsung Electronics Co., Ltd * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 * USA */ #include "Escargot.h" #include "parser/Lexer.h" #include "parser/UnicodeIdentifierTables.h" #include "parser/esprima_cpp/ParserContext.h" // These two must be the last because they overwrite the ASSERT macro. #include "double-conversion.h" #include "ieee.h" using namespace Escargot::EscargotLexer; namespace Escargot { #define IDENT_RANGE_LONG 200 /* The largest code-point that an UTF16 surrogate pair can represent is 0x10ffff, * so any codepoint above this can be a valid value for empty. The UINT32_MAX is * chosen because it is a valid immediate for machine instructions. */ #define EMPTY_CODE_POINT UINT32_MAX /* The largest octal value is 255, so any higher * value can represent an invalid octal value. */ #define NON_OCTAL_VALUE 256 char EscargotLexer::g_asciiRangeCharMap[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, LexerIsCharWhiteSpace, LexerIsCharLineTerminator, LexerIsCharWhiteSpace, LexerIsCharWhiteSpace, LexerIsCharLineTerminator, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, LexerIsCharWhiteSpace, 0, 0, 0, LexerIsCharIdentStart | LexerIsCharIdent, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, LexerIsCharIdent, 0, 0, 0, 0, 0, 0, 0, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, 0, LexerIsCharIdentStart | LexerIsCharIdent, 0, 0, LexerIsCharIdentStart | LexerIsCharIdent, 0, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, LexerIsCharIdentStart | LexerIsCharIdent, 0, 0, 0, 0, 0 }; NEVER_INLINE bool EscargotLexer::isWhiteSpaceSlowCase(char16_t ch) { ASSERT(ch >= 0x80); if (LIKELY(ch < 0x1680)) { return (ch == 0xA0); } return (ch == 0x1680 || ch == 0x2000 || ch == 0x2001 || ch == 0x2002 || ch == 0x2003 || ch == 0x2004 || ch == 0x2005 || ch == 0x2006 || ch == 0x2007 || ch == 0x2008 || ch == 0x2009 || ch == 0x200A || ch == 0x202F || ch == 0x205F || ch == 0x3000 || ch == 0xFEFF); } static NEVER_INLINE bool isIdentifierPartSlow(char32_t ch) { int bottom = 0; int top = (EscargotLexer::basic_plane_length / sizeof(uint16_t)) - 1; while (true) { int middle = (bottom + top) >> 1; char32_t rangeStart = identRangeStart[middle]; if (ch >= rangeStart) { if (ch < identRangeStart[middle + 1]) { char32_t length = identRangeLength[middle]; if (UNLIKELY(length >= IDENT_RANGE_LONG)) { length = identRangeLongLength[length - IDENT_RANGE_LONG]; } return ch <= rangeStart + length; } bottom = middle + 1; } else { top = middle; } if (bottom == top) { return false; } } } static NEVER_INLINE bool isIdentifierPartSlowSupplementary(char32_t ch) { int bottom = 0; int top = (EscargotLexer::supplementary_plane_length / sizeof(uint32_t)) - 1; while (true) { int middle = (bottom + top) >> 1; char32_t rangeStart = identRangeStartSupplementaryPlane[middle]; if (ch >= rangeStart) { if (ch < identRangeStartSupplementaryPlane[middle + 1]) { char32_t length = identRangeLengthSupplementaryPlane[middle]; return ch <= rangeStart + length; } bottom = middle + 1; } else { top = middle; } if (bottom == top) { return false; } } } static ALWAYS_INLINE bool isIdentifierPart(char32_t ch) { if (LIKELY(ch < 128)) { return g_asciiRangeCharMap[ch] & LexerIsCharIdent; } return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch); } static ALWAYS_INLINE bool isIdentifierStart(char32_t ch) { if (LIKELY(ch < 128)) { return g_asciiRangeCharMap[ch] & LexerIsCharIdentStart; } return isIdentifierPartSlow(ch) || isIdentifierPartSlowSupplementary(ch); } static ALWAYS_INLINE bool isDecimalDigit(char16_t ch) { return (ch >= '0' && ch <= '9'); } static ALWAYS_INLINE bool isDecimalDigitOrUnderscore(char16_t ch, bool& seenUnderScore) { if (UNLIKELY(ch == '_')) { seenUnderScore = true; return true; } return (ch >= '0' && ch <= '9'); } static ALWAYS_INLINE bool isHexDigit(char16_t ch) { return isDecimalDigit(ch) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f'); } static ALWAYS_INLINE bool isHexDigitOrUnderscore(char16_t ch, bool& seenUnderScore) { return isDecimalDigitOrUnderscore(ch, seenUnderScore) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f'); } static ALWAYS_INLINE bool isOctalDigit(char16_t ch) { return (ch >= '0' && ch <= '7'); } static ALWAYS_INLINE char16_t octalValue(char16_t ch) { ASSERT(isOctalDigit(ch)); return ch - '0'; } static ALWAYS_INLINE uint8_t toHexNumericValue(char16_t ch) { return ch < 'A' ? ch - '0' : ((ch - 'A' + 10) & 0xF); } static int hexValue(char16_t ch) { if (ch >= '0' && ch <= '9') { return ch - '0'; } ASSERT((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f'); return (ch | 0x20) - ('a' - 10); } struct ParserCharPiece { char16_t data[3]; size_t length; ParserCharPiece(const char32_t a) { if (a < 0x10000) { data[0] = a; data[1] = 0; length = 1; } else { data[0] = (char16_t)(0xD800 + ((a - 0x10000) >> 10)); data[1] = (char16_t)(0xDC00 + ((a - 0x10000) & 1023)); data[2] = 0; length = 2; } } }; AtomicString keywordToString(::Escargot::Context* ctx, KeywordKind keyword) { switch (keyword) { case IfKeyword: return ctx->staticStrings().stringIf; case InKeyword: return ctx->staticStrings().stringIn; case DoKeyword: return ctx->staticStrings().stringDo; case VarKeyword: return ctx->staticStrings().var; case ForKeyword: return ctx->staticStrings().stringFor; case NewKeyword: return ctx->staticStrings().stringNew; case TryKeyword: return ctx->staticStrings().stringTry; case ThisKeyword: return ctx->staticStrings().stringThis; case ElseKeyword: return ctx->staticStrings().stringElse; case CaseKeyword: return ctx->staticStrings().stringCase; case VoidKeyword: return ctx->staticStrings().stringVoid; case WithKeyword: return ctx->staticStrings().with; case EnumKeyword: return ctx->staticStrings().stringEnum; case WhileKeyword: return ctx->staticStrings().stringWhile; case BreakKeyword: return ctx->staticStrings().stringBreak; case CatchKeyword: return ctx->staticStrings().stringCatch; case ThrowKeyword: return ctx->staticStrings().stringThrow; case ConstKeyword: return ctx->staticStrings().stringConst; case ClassKeyword: return ctx->staticStrings().stringClass; case SuperKeyword: return ctx->staticStrings().super; case ReturnKeyword: return ctx->staticStrings().stringReturn; case TypeofKeyword: return ctx->staticStrings().stringTypeof; case DeleteKeyword: return ctx->staticStrings().stringDelete; case SwitchKeyword: return ctx->staticStrings().stringSwitch; case ExportKeyword: return ctx->staticStrings().stringExport; case ImportKeyword: return ctx->staticStrings().stringImport; case DefaultKeyword: return ctx->staticStrings().stringDefault; case FinallyKeyword: return ctx->staticStrings().finally; case ExtendsKeyword: return ctx->staticStrings().extends; case FunctionKeyword: return ctx->staticStrings().function; case ContinueKeyword: return ctx->staticStrings().stringContinue; case DebuggerKeyword: return ctx->staticStrings().debugger; case InstanceofKeyword: return ctx->staticStrings().instanceof ; case ImplementsKeyword: return ctx->staticStrings().implements; case InterfaceKeyword: return ctx->staticStrings().interface; case PackageKeyword: return ctx->staticStrings().package; case PrivateKeyword: return ctx->staticStrings().stringPrivate; case ProtectedKeyword: return ctx->staticStrings().stringProtected; case PublicKeyword: return ctx->staticStrings().stringPublic; case StaticKeyword: return ctx->staticStrings().stringStatic; case YieldKeyword: return ctx->staticStrings().yield; case LetKeyword: return ctx->staticStrings().let; case NullKeyword: return ctx->staticStrings().null; case TrueKeyword: return ctx->staticStrings().stringTrue; case FalseKeyword: return ctx->staticStrings().stringFalse; case GetKeyword: return ctx->staticStrings().get; case SetKeyword: return ctx->staticStrings().set; case EvalKeyword: return ctx->staticStrings().eval; case ArgumentsKeyword: return ctx->staticStrings().arguments; case OfKeyword: return ctx->staticStrings().of; case AsyncKeyword: return ctx->staticStrings().async; case AwaitKeyword: return ctx->staticStrings().await; case AsKeyword: return ctx->staticStrings().as; case FromKeyword: return ctx->staticStrings().from; default: ASSERT_NOT_REACHED(); return ctx->staticStrings().error; } } void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* description, ErrorObject::Code code) { UTF16StringDataNonGCStd msg = u"Line "; const size_t bufferLength = 64; char lineStringBuf[bufferLength]; char* bufPtr = lineStringBuf + bufferLength - 2; /* Adds ": " at the end. */ bufPtr[0] = ':'; bufPtr[1] = ' '; size_t value = line; do { ASSERT(bufPtr > lineStringBuf); --bufPtr; *bufPtr = value % 10 + '0'; value /= 10; } while (value > 0); msg += UTF16StringDataNonGCStd(bufPtr, lineStringBuf + bufferLength); if (description->length()) { msg += UTF16StringDataNonGCStd(description->toUTF16StringData().data()); } esprima::Error* error = new (NoGC) esprima::Error(new UTF16String(msg.data(), msg.length())); error->index = index; error->lineNumber = line; error->column = col; error->description = description; error->errorCode = code; throw error; }; ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const { return ParserStringView(source, this->start, this->end); } StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const { return StringView(source, this->start, this->end); } ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source) { return ParserStringView(source, this->start, this->end); } StringView Scanner::ScannerResult::relatedSource(const StringView& source) { return StringView(source, this->start, this->end); } Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance) { ASSERT(this->type == Token::StringLiteralToken); if (UNLIKELY(this->hasAllocatedString)) { if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) { constructStringLiteral(scannerInstance); } return this->valueStringLiteralData.m_stringIfNewlyAllocated; } // check if string is one of typeof strings // we only consider the most common cases which are undefined, object, function size_t start = this->valueStringLiteralData.m_start; size_t end = this->valueStringLiteralData.m_end; size_t length = end - start; if (length > 5 && length < 10) { ParserStringView str(scannerInstance->source, start, end); switch (str.bufferedCharAt(0)) { case 'o': { if (length == 6 && str.equalsSameLength("object", 1)) { return scannerInstance->escargotContext->staticStrings().object.string(); } break; } case 'f': { if (length == 8 && str.equalsSameLength("function", 1)) { return scannerInstance->escargotContext->staticStrings().function.string(); } break; } case 'u': { if (length == 9 && str.equalsSameLength("undefined", 1)) { return scannerInstance->escargotContext->staticStrings().undefined.string(); } break; } default: { return new StringView(scannerInstance->sourceAsNormalView, start, end); } } } return new StringView(scannerInstance->sourceAsNormalView, start, end); } ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance) { if (this->type == Token::KeywordToken) { AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind); return ParserStringView(as.string(), 0, as.string()->length()); } if (this->hasAllocatedString) { if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) { constructStringLiteral(scannerInstance); } return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated); } return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end); } std::pair Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance) { if (this->hasNonComputedNumberLiteral) { const auto& bd = scannerInstance->source.bufferAccessData(); char* buffer; int length = this->end - this->start; if (UNLIKELY(this->hasNumberSeparatorOnNumberLiteral)) { buffer = ALLOCA(this->end - this->start, char, ec); int underScoreCount = 0; for (int i = 0; i < length; i++) { auto c = bd.charAt(i + this->start); if (c == '_') { underScoreCount++; } else { buffer[i - underScoreCount] = c; } } length -= underScoreCount; ASSERT(underScoreCount != 0); } else { if (bd.has8BitContent) { buffer = ((char*)bd.buffer) + this->start; } else { buffer = ALLOCA(this->end - this->start, char, ec); for (int i = 0; i < length; i++) { buffer[i] = bd.uncheckedCharAtFor16Bit(i + this->start); } } } // bigint case if (UNLIKELY(buffer[length - 1] == 'n')) { return std::make_pair(Value(BigInt::parseString(buffer, length - 1).value()), true); } int lengthDummy; double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::ALLOW_HEX | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES, 0.0, double_conversion::Double::NaN(), "Infinity", "NaN"); double ll = converter.StringToDouble(buffer, length, &lengthDummy); this->valueNumber = ll; this->hasNonComputedNumberLiteral = false; } return std::make_pair(Value(this->valueNumber), false); } void Scanner::ScannerResult::constructStringLiteralHelperAppendUTF16(Scanner* scannerInstance, char16_t ch, UTF16StringDataNonGCStd& stringUTF16, bool& isEveryCharLatin1) { switch (ch) { case 'u': case 'x': { char32_t param; if (scannerInstance->peekChar() == '{') { ++scannerInstance->index; param = scannerInstance->scanUnicodeCodePointEscape(); } else { param = scannerInstance->scanHexEscape(ch); } ParserCharPiece piece(param); stringUTF16.append(piece.data, piece.data + piece.length); if (piece.length != 1 || piece.data[0] >= 256) { isEveryCharLatin1 = false; } return; } case 'n': stringUTF16 += '\n'; return; case 'r': stringUTF16 += '\r'; return; case 't': stringUTF16 += '\t'; return; case 'b': stringUTF16 += '\b'; return; case 'f': stringUTF16 += '\f'; return; case 'v': stringUTF16 += '\x0B'; return; default: if (ch && isOctalDigit(ch)) { uint16_t octToDec = scannerInstance->octalToDecimal(ch, true); stringUTF16 += octToDec; ASSERT(octToDec < 256); } else { stringUTF16 += ch; if (ch >= 256) { isEveryCharLatin1 = false; } } return; } } void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance) { size_t indexBackup = scannerInstance->index; size_t lineNumberBackup = scannerInstance->lineNumber; size_t lineStartBackup = scannerInstance->lineStart; scannerInstance->index = this->start; char16_t quote = scannerInstance->peekChar(); ASSERT((quote == '\'' || quote == '"')); // 'String literal must starts with a quote'); ++scannerInstance->index; bool isEveryCharLatin1 = true; UTF16StringDataNonGCStd stringUTF16; while (true) { char16_t ch = scannerInstance->peekChar(); ++scannerInstance->index; if (ch == quote) { quote = '\0'; break; } else if (UNLIKELY(ch == '\\')) { ch = scannerInstance->peekChar(); ++scannerInstance->index; if (!ch || !isLineTerminator(ch)) { this->constructStringLiteralHelperAppendUTF16(scannerInstance, ch, stringUTF16, isEveryCharLatin1); } else { ++scannerInstance->lineNumber; char16_t bufferedChar = scannerInstance->peekChar(); if ((ch == '\r' && bufferedChar == '\n') || (ch == '\n' && bufferedChar == '\r')) { ++scannerInstance->index; } scannerInstance->lineStart = scannerInstance->index; } } else if (UNLIKELY(isLineTerminator(ch))) { break; } else { stringUTF16 += ch; if (ch >= 256) { isEveryCharLatin1 = false; } } } scannerInstance->index = indexBackup; scannerInstance->lineNumber = lineNumberBackup; scannerInstance->lineStart = lineStartBackup; String* newStr; if (isEveryCharLatin1) { newStr = String::fromLatin1(stringUTF16.data(), stringUTF16.length()); } else { newStr = new UTF16String(stringUTF16.data(), stringUTF16.length()); } this->valueStringLiteralData.m_stringIfNewlyAllocated = newStr; } Scanner::Scanner(::Escargot::Context* escargotContext, ::Escargot::esprima::ParserContext* parserContext, StringView code, bool isModule, size_t startLine, size_t startColumn) : source(code, 0, code.length()) , sourceAsNormalView(code) , escargotContext(escargotContext) , parserContext(parserContext) , sourceCodeAccessData(code.bufferAccessData()) , isModule(isModule) , length(code.length()) , index(0) , lineNumber(startLine) , lineStart(startColumn) { ASSERT(escargotContext != nullptr); // trackComment = false; } void Scanner::resetSource(StringView code) { this->source = ParserStringView(code, 0, code.length()); this->sourceAsNormalView = code; this->sourceCodeAccessData = code.bufferAccessData(); this->length = code.length(); this->index = 0; this->lineNumber = 1; this->lineStart = 0; } void Scanner::skipSingleLine() { while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; if (isLineTerminator(ch)) { if (ch == 13 && this->peekCharWithoutEOF() == 10) { ++this->index; } ++this->lineNumber; this->lineStart = this->index; return; } } } void Scanner::skipSingleLineComment(void) { while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; if (isLineTerminator(ch)) { if (ch == 13 && this->peekCharWithoutEOF() == 10) { ++this->index; } ++this->lineNumber; this->lineStart = this->index; // return comments; return; } } } void Scanner::skipMultiLineComment(void) { while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; if (isLineTerminator(ch)) { if (ch == 0x0D && this->peekCharWithoutEOF() == 0x0A) { ++this->index; } ++this->lineNumber; this->lineStart = this->index; } else if (ch == 0x2A && this->peekCharWithoutEOF() == 0x2F) { // Block comment ends with '*/'. ++this->index; return; } } throwUnexpectedToken(); } char32_t Scanner::scanHexEscape(char prefix) { size_t len = (prefix == 'u') ? 4 : 2; char32_t code = 0; for (size_t i = 0; i < len; ++i) { if (!this->eof() && isHexDigit(this->peekCharWithoutEOF())) { code = code * 16 + hexValue(this->peekCharWithoutEOF()); ++this->index; } else { return EMPTY_CODE_POINT; } } return code; } char32_t Scanner::scanUnicodeCodePointEscape() { // At least, one hex digit is required. if (this->eof() || this->peekCharWithoutEOF() == '}') { this->throwUnexpectedToken(); } char32_t code = 0; char16_t ch; while (!this->eof()) { ch = this->peekCharWithoutEOF(); ++this->index; if (!isHexDigit(ch)) { break; } code = code * 16 + hexValue(ch); } if (code > 0x10FFFF || ch != '}') { this->throwUnexpectedToken(); } return code; } Scanner::ScanIDResult Scanner::getIdentifier() { const size_t start = this->index; ++this->index; while (UNLIKELY(!this->eof())) { const char16_t ch = this->peekCharWithoutEOF(); if (UNLIKELY(ch == 0x5C)) { // Blackslash (U+005C) marks Unicode escape sequence. this->index = start; return this->getComplexIdentifier(); } else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) { // Need to handle surrogate pairs. this->index = start; return this->getComplexIdentifier(); } if (isIdentifierPart(ch)) { ++this->index; } else { break; } } const auto& srcData = this->source.bufferAccessData(); StringBufferAccessData ad(srcData.has8BitContent, this->index - start, srcData.has8BitContent ? reinterpret_cast(((LChar*)srcData.buffer) + start) : reinterpret_cast(((char16_t*)srcData.buffer) + start)); return std::make_tuple(ad, nullptr); } Scanner::ScanIDResult Scanner::getComplexIdentifier() { char16_t cp = this->codePointAt(this->index); ParserCharPiece piece = ParserCharPiece(cp); UTF16StringDataNonGCStd id(piece.data, piece.length); this->index += id.length(); // '\u' (U+005C, U+0075) denotes an escaped character. char32_t ch; if (cp == 0x5C) { if (this->peekChar() != 0x75) { this->throwUnexpectedToken(); } ++this->index; if (this->peekChar() == '{') { ++this->index; ch = this->scanUnicodeCodePointEscape(); } else { ch = this->scanHexEscape('u'); cp = ch; if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) { this->throwUnexpectedToken(); } } id = ch; } while (!this->eof()) { cp = this->codePointAt(this->index); if (!isIdentifierPart(cp)) { break; } // ch = Character.fromCodePoint(cp); ch = cp; if (ch >= 128 && this->peekChar() >= 0xD800 && this->peekChar() < 0xDFFF) { ch = peekChar(); ++this->index; char32_t ch2 = this->peekChar(); if (U16_IS_TRAIL(ch2)) { ch = U16_GET_SUPPLEMENTARY(ch, ch2); } --this->index; } piece = ParserCharPiece(ch); id += UTF16StringDataNonGCStd(piece.data, piece.length); this->index += piece.length; // '\u' (U+005C, U+0075) denotes an escaped character. if (cp == 0x5C) { // id = id.substr(0, id.length - 1); id.erase(id.length() - 1); if (this->peekChar() != 0x75) { this->throwUnexpectedToken(); } ++this->index; if (this->peekChar() == '{') { ++this->index; ch = this->scanUnicodeCodePointEscape(); } else { ch = this->scanHexEscape('u'); cp = ch; if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierPart(cp)) { this->throwUnexpectedToken(); } } piece = ParserCharPiece(ch); id += UTF16StringDataNonGCStd(piece.data, piece.length); } } String* str = new UTF16String(id.data(), id.length()); if (UNLIKELY(this->parserContext->await && id == u"await")) { this->throwUnexpectedToken(Messages::KeywordMustNotContainEscapedCharacters); } return std::make_tuple(str->bufferAccessData(), str); } uint16_t Scanner::octalToDecimal(char16_t ch, bool octal) { // \0 is not octal escape sequence char16_t code = octalValue(ch); octal |= (ch != '0'); if (!this->eof() && isOctalDigit(this->peekChar())) { octal = true; code = code * 8 + octalValue(this->peekChar()); ++this->index; // 3 digits are only allowed when string starts // with 0, 1, 2, 3 // if ('0123'.indexOf(ch) >= 0 && !this->eof() && Character.isOctalDigit(this->source.charCodeAt(this->index))) { if ((ch >= '0' && ch <= '3') && !this->eof() && isOctalDigit(this->peekChar())) { code = code * 8 + octalValue(this->peekChar()); ++this->index; } } ASSERT(!octal || code < NON_OCTAL_VALUE); return octal ? code : NON_OCTAL_VALUE; }; void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch) { const size_t start = this->index; PunctuatorKind kind; // Check for most common single-character punctuators. ++this->index; switch (ch) { case '(': kind = LeftParenthesis; break; case '{': kind = LeftBrace; break; case '.': kind = Period; if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') { // Spread operator "..." this->index += 2; kind = PeriodPeriodPeriod; } break; case '}': kind = RightBrace; break; case ')': kind = RightParenthesis; break; case ';': kind = SemiColon; break; case ',': kind = Comma; break; case '[': kind = LeftSquareBracket; break; case ']': kind = RightSquareBracket; break; case ':': kind = Colon; break; case '?': kind = GuessMark; ch = this->peekChar(); if (ch == '?') { ++this->index; kind = NullishCoalescing; if (this->peekChar() == '=') { kind = LogicalNullishEqual; ++this->index; } } else if (ch == '.') { ++this->index; kind = GuessDot; } break; case '~': kind = Wave; break; case '>': ch = this->peekChar(); kind = RightInequality; if (ch == '>') { ++this->index; ch = this->peekChar(); kind = RightShift; if (ch == '>') { ++this->index; kind = UnsignedRightShift; if (this->peekChar() == '=') { ++this->index; kind = UnsignedRightShiftEqual; } } else if (ch == '=') { kind = RightShiftEqual; ++this->index; } } else if (ch == '=') { kind = RightInequalityEqual; ++this->index; } break; case '<': ch = this->peekChar(); kind = LeftInequality; if (ch == '<') { ++this->index; kind = LeftShift; if (this->peekChar() == '=') { kind = LeftShiftEqual; ++this->index; } } else if (ch == '=') { kind = LeftInequalityEqual; ++this->index; } break; case '=': ch = this->peekChar(); kind = Substitution; if (ch == '=') { ++this->index; kind = Equal; if (this->peekChar() == '=') { kind = StrictEqual; ++this->index; } } else if (ch == '>') { kind = Arrow; ++this->index; } break; case '!': kind = ExclamationMark; if (this->peekChar() == '=') { ++this->index; kind = NotEqual; if (this->peekChar() == '=') { kind = NotStrictEqual; ++this->index; } } break; case '&': ch = this->peekChar(); kind = BitwiseAnd; if (ch == '&') { kind = LogicalAnd; ++this->index; if (this->peekChar() == '=') { ++this->index; kind = LogicalAndEqual; } } else if (ch == '=') { kind = BitwiseAndEqual; ++this->index; } break; case '|': ch = this->peekChar(); kind = BitwiseOr; if (ch == '|') { kind = LogicalOr; ++this->index; if (this->peekChar() == '=') { ++this->index; kind = LogicalOrEqual; } } else if (ch == '=') { kind = BitwiseOrEqual; ++this->index; } break; case '^': kind = BitwiseXor; if (this->peekChar() == '=') { kind = BitwiseXorEqual; ++this->index; } break; case '+': ch = this->peekChar(); kind = Plus; if (ch == '+') { kind = PlusPlus; ++this->index; } else if (ch == '=') { kind = PlusEqual; ++this->index; } break; case '-': ch = this->peekChar(); kind = Minus; if (ch == '-') { kind = MinusMinus; ++this->index; } else if (ch == '=') { kind = MinusEqual; ++this->index; } break; case '*': ch = this->peekChar(); kind = Multiply; if (ch == '=') { kind = MultiplyEqual; ++this->index; } else if (ch == '*') { kind = Exponentiation; ++this->index; if (this->peekChar() == '=') { kind = ExponentiationEqual; ++this->index; } } break; case '/': kind = Divide; if (this->peekChar() == '=') { kind = DivideEqual; ++this->index; } break; case '%': kind = Mod; if (this->peekChar() == '=') { kind = ModEqual; ++this->index; } break; case '#': kind = Hash; if (this->index == 1 && this->peekChar() == '!') { kind = HashBang; ++this->index; } break; default: this->throwUnexpectedToken(); kind = PunctuatorKindEnd; break; } token->setPunctuatorResult(this->lineNumber, this->lineStart, start, this->index, kind); } void Scanner::testNumericSeparator(size_t start, bool isBigInt, bool isHex, bool isBinary, bool isOctal) { for (size_t i = start; i < this->index - 1; i++) { char16_t ch = this->sourceCharAt(i); if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError); } if (UNLIKELY(isHex && (ch == 'x' || ch == 'X') && this->sourceCharAt(i + 1) == '_')) { this->throwUnexpectedToken(); } if (UNLIKELY(isBinary && (ch == 'b' || ch == 'B') && this->sourceCharAt(i + 1) == '_')) { this->throwUnexpectedToken(); } if (UNLIKELY(isOctal && (ch == 'o' || ch == 'O') && this->sourceCharAt(i + 1) == '_')) { this->throwUnexpectedToken(); } } if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError); } } void Scanner::scanHexLiteral(Scanner::ScannerResult* token, size_t start) { ASSERT(token != nullptr); uint64_t number = 0; double numberDouble = 0.0; bool shouldUseDouble = false; bool scanned = false; bool seenUnderscore = false; size_t shiftCount = 0; while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); if (!isHexDigitOrUnderscore(ch, seenUnderscore)) { break; } if (UNLIKELY(ch == '_')) { this->index++; continue; } if (shouldUseDouble) { numberDouble = numberDouble * 16 + toHexNumericValue(ch); } else { number = (number << 4) + toHexNumericValue(ch); if (++shiftCount >= 16) { shouldUseDouble = true; numberDouble = number; number = 0; } } this->index++; scanned = true; } if (!scanned) { this->throwUnexpectedToken(); } bool isEof = this->eof(); bool isBigInt = !isEof && this->peekChar() == 'n'; if (UNLIKELY(isBigInt)) { ++this->index; } if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) { this->throwUnexpectedToken(); } if (UNLIKELY(seenUnderscore)) { testNumericSeparator(start, isBigInt, true, false, false); } if (shouldUseDouble) { ASSERT(number == 0); token->setNumericLiteralResult(numberDouble, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore); } else { ASSERT(numberDouble == 0.0); token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore); } } void Scanner::scanBinaryLiteral(Scanner::ScannerResult* token, size_t start) { ASSERT(token != nullptr); uint64_t number = 0; bool scanned = false; bool seenUnderscore = false; while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); if (ch == '0' || ch == '1') { number = (number << 1) + ch - '0'; this->index++; scanned = true; } else if (ch == '_') { this->index++; seenUnderscore = true; } else { break; } } if (!scanned) { // only 0b or 0B this->throwUnexpectedToken(); } bool isEof = this->eof(); bool isBigInt = !isEof && this->peekChar() == 'n'; if (UNLIKELY(isBigInt)) { ++this->index; } if (UNLIKELY(!isEof && (isIdentifierStart(this->peekChar()) || isDecimalDigit(this->peekChar())))) { this->throwUnexpectedToken(); } if (UNLIKELY(seenUnderscore)) { testNumericSeparator(start, isBigInt, false, true, false); } token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore); } void Scanner::scanOctalLiteral(Scanner::ScannerResult* token, char16_t prefix, size_t start, bool isLegacyOctal) { ASSERT(token != nullptr); uint64_t number = 0; bool scanned = false; bool octal = isOctalDigit(prefix); bool seenUnderscore = false; while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); if (!isLegacyOctal) { if (UNLIKELY(ch == '_')) { this->index++; seenUnderscore = true; continue; } } if (!isOctalDigit(ch)) { break; } number = (number << 3) + ch - '0'; this->index++; scanned = true; } if (!octal && !scanned) { // only 0o or 0O throwUnexpectedToken(); } bool isEof = this->eof(); bool isBigInt = !isEof && !isLegacyOctal && this->peekChar() == 'n'; if (UNLIKELY(isBigInt)) { ++this->index; } char16_t ch = this->peekChar(); if (isIdentifierStart(ch) || isDecimalDigit(ch)) { throwUnexpectedToken(); } if (UNLIKELY(seenUnderscore)) { testNumericSeparator(start, isBigInt, false, false, true); } token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, isBigInt, seenUnderscore); token->octal = octal; } bool Scanner::isImplicitOctalLiteral() { // Implicit octal, unless there is a non-octal digit. // (Annex B.1.1 on Numeric Literals) for (size_t i = this->index + 1; i < this->length; ++i) { const char16_t ch = this->sourceCharAt(i); if (ch == '8' || ch == '9') { return false; } if (!isOctalDigit(ch)) { return true; } } return true; } void Scanner::scanNumericLiteral(Scanner::ScannerResult* token) { ASSERT(token != nullptr); const size_t start = this->index; char16_t ch = this->peekChar(); char16_t startChar = ch; ASSERT(isDecimalDigit(ch) || (ch == '.')); // 'Numeric literal must start with a decimal digit or a decimal point'); bool seenDotOrE = false; bool seenUnderscore = false; if (ch != '.') { auto number = this->peekChar(); ++this->index; ch = this->peekChar(); // Hex number starts with '0x'. // Octal number starts with '0'. // Octal number in ES6 starts with '0o'. // Binary number in ES6 starts with '0b'. if (number == '0') { if (ch == 'x' || ch == 'X') { ++this->index; return this->scanHexLiteral(token, start); } if (ch == 'b' || ch == 'B') { ++this->index; return this->scanBinaryLiteral(token, start); } if (ch == 'o' || ch == 'O') { ++this->index; return this->scanOctalLiteral(token, ch, start, false); } if (ch && isOctalDigit(ch) && this->isImplicitOctalLiteral()) { return this->scanOctalLiteral(token, ch, start, true); } } while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) { ++this->index; } ch = this->peekChar(); } if (ch == '.') { seenDotOrE = true; ++this->index; while (isDecimalDigitOrUnderscore(this->peekChar(), seenUnderscore)) { ++this->index; } ch = this->peekChar(); } if (ch == 'e' || ch == 'E') { seenDotOrE = true; ++this->index; ch = this->peekChar(); if (ch == '+' || ch == '-') { ++this->index; ch = this->peekChar(); } if (isDecimalDigit(ch)) { do { ++this->index; ch = this->peekChar(); } while (isDecimalDigitOrUnderscore(ch, seenUnderscore)); } else { this->throwUnexpectedToken(); } } bool isEof = this->eof(); bool isBigInt = !isEof && this->peekChar() == 'n'; if (UNLIKELY(isBigInt)) { if (seenDotOrE || (startChar == '0' && (this->index - start) > 1)) { this->throwUnexpectedToken(); } ++this->index; } if (UNLIKELY(!isEof && isIdentifierStart(this->peekChar()))) { this->throwUnexpectedToken(); } if (UNLIKELY(seenUnderscore)) { if (this->sourceCharAt(start) == '0') { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator can not be used after leading 0"), ErrorObject::SyntaxError); } for (size_t i = start; i < this->index - 1; i++) { char16_t ch = this->sourceCharAt(i); if (UNLIKELY(ch == '_' && this->sourceCharAt(i + 1) == '_')) { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Only one underscore is allowed as numeric separator"), ErrorObject::SyntaxError); } if (UNLIKELY(ch == '_' && (this->sourceCharAt(i + 1) == 'e' || this->sourceCharAt(i + 1) == 'E'))) { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separator may not appear adjacent to ExponentPart"), ErrorObject::SyntaxError); } if (UNLIKELY(ch == '.' && this->sourceCharAt(i + 1) == '_')) { this->throwUnexpectedToken(); } } if (this->sourceCharAt(this->index - 1) == '_' || (isBigInt && this->sourceCharAt(this->index - 2) == '_')) { ErrorHandler::throwError(start, this->lineNumber, start - this->lineStart + 1, new ASCIIString("Numeric separators are not allowed at the end of numeric literals"), ErrorObject::SyntaxError); } } token->setNumericLiteralResult(0, this->lineNumber, this->lineStart, start, this->index, true, seenUnderscore); if (UNLIKELY(startChar == '0' && !seenDotOrE && (this->index - start) > (isBigInt ? 2 : 1))) { token->startWithZero = true; } } void Scanner::scanStringLiteral(Scanner::ScannerResult* token) { ASSERT(token != nullptr); const size_t start = this->index; char16_t quote = this->peekChar(); ASSERT((quote == '\'' || quote == '"')); // 'String literal must starts with a quote'); ++this->index; bool octal = false; bool isPlainCase = true; while (LIKELY(!this->eof())) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; if (ch == quote) { quote = '\0'; break; } else if (UNLIKELY(ch == '\\')) { ch = this->peekChar(); ++this->index; isPlainCase = false; if (!ch || !isLineTerminator(ch)) { switch (ch) { case 'u': if (this->peekChar() == '{') { ++this->index; this->scanUnicodeCodePointEscape(); } else if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) { this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence); } break; case 'x': if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) { this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence); } break; case 'n': case 'r': case 't': case 'b': case 'f': case 'v': break; default: if (ch && isOctalDigit(ch)) { octal |= (this->octalToDecimal(ch, false) != NON_OCTAL_VALUE); } else if (isDecimalDigit(ch)) { octal = true; } break; } } else { ++this->lineNumber; if (ch == '\r' && this->peekChar() == '\n') { ++this->index; } else if (ch == '\n' && this->peekChar() == '\r') { ++this->index; } this->lineStart = this->index; } } else if (UNLIKELY(ch < 128 && (g_asciiRangeCharMap[ch] & LexerIsCharLineTerminator))) { // while parsing string literal, we should not end parsing string token with 0x2028 or 0x2029 break; } } if (quote != '\0') { this->index = start; this->throwUnexpectedToken(); } if (isPlainCase) { token->setResult(Token::StringLiteralToken, start + 1, this->index - 1, this->lineNumber, this->lineStart, start, this->index, octal); } else { // build string if needs token->setResult(Token::StringLiteralToken, (String*)nullptr, this->lineNumber, this->lineStart, start, this->index, octal); } } bool Scanner::isFutureReservedWord(const ParserStringView& id) { const StringBufferAccessData& data = id.bufferAccessData(); switch (data.length) { case 4: return data.equalsSameLength("enum"); case 5: return data.equalsSameLength("super"); case 6: return data.equalsSameLength("export") || data.equalsSameLength("import"); } return false; } bool Scanner::isStrictModeReservedWord(::Escargot::Context* ctx, const AtomicString& identifier) { switch (identifier.string()->length()) { case 3: // let return identifier == ctx->staticStrings().let; case 5: // yield return identifier == ctx->staticStrings().yield; case 6: // static public return identifier == ctx->staticStrings().stringStatic || identifier == ctx->staticStrings().stringPublic; case 7: // private package return identifier == ctx->staticStrings().stringPrivate || identifier == ctx->staticStrings().package; case 9: // protected interface return identifier == ctx->staticStrings().stringProtected || identifier == ctx->staticStrings().interface; case 10: // implements return identifier == ctx->staticStrings().implements; } return false; } void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head) { ASSERT(token != nullptr); // TODO apply rope-string UTF16StringDataNonGCStd cooked; UTF16StringDataNonGCStd raw; bool terminated = false; Optional error; size_t start = this->index; size_t indexForError = this->index; bool tail = false; try { while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; indexForError = this->index; if (ch == '`') { tail = true; terminated = true; break; } else if (ch == '$') { if (this->peekChar() == '{') { ++this->index; indexForError = this->index; terminated = true; break; } cooked += ch; raw += ch; } else if (ch == '\\') { raw += ch; ch = this->peekChar(); if (!isLineTerminator(ch)) { auto currentIndex = this->index; ++this->index; switch (ch) { case 'n': cooked += '\n'; break; case 'r': cooked += '\r'; break; case 't': cooked += '\t'; break; case 'u': if (this->peekChar() == '{') { ++this->index; cooked += this->scanUnicodeCodePointEscape(); } else { const size_t restore = this->index; const char32_t unescaped = this->scanHexEscape(ch); if (unescaped != EMPTY_CODE_POINT) { ParserCharPiece piece(unescaped); cooked += UTF16StringDataNonGCStd(piece.data, piece.length); } else { this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence); } } break; case 'x': { const char32_t unescaped = this->scanHexEscape(ch); if (unescaped == EMPTY_CODE_POINT) { this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence); } ParserCharPiece piece(unescaped); cooked += UTF16StringDataNonGCStd(piece.data, piece.length); break; } case 'b': cooked += '\b'; break; case 'f': cooked += '\f'; break; case 'v': cooked += '\v'; break; default: if (ch == '0') { if (isDecimalDigit(this->peekChar())) { // Illegal: \01 \02 and so on this->throwUnexpectedToken(Messages::TemplateOctalLiteral); } cooked += (char16_t)'\0'; } else if (isOctalDigit(ch)) { // Illegal: \1 \2 this->throwUnexpectedToken(Messages::TemplateOctalLiteral); } else { cooked += ch; } break; } auto endIndex = this->index; for (size_t i = currentIndex; i < endIndex; i++) { raw += this->sourceCharAt(i); } } else { ++this->index; indexForError = this->index; ++this->lineNumber; if (ch == '\r' && this->peekChar() == '\n') { ++this->index; indexForError = this->index; } if (ch == 0x2028 || ch == 0x2029) { raw += ch; } else { raw += '\n'; } this->lineStart = this->index; } } else if (isLineTerminator(ch)) { ++this->lineNumber; if (ch == '\r' && this->peekChar() == '\n') { ++this->index; indexForError = this->index; } if (ch == 0x2028 || ch == 0x2029) { raw += ch; cooked += ch; } else { raw += '\n'; cooked += '\n'; } this->lineStart = this->index; } else { cooked += ch; raw += ch; } } if (!terminated) { this->throwUnexpectedToken(); } } catch (esprima::Error* err) { error = new (GC) esprima::Error(*err); delete err; this->index = indexForError; while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); ++this->index; if (ch == '`') { tail = true; terminated = true; break; } else if (ch == '$') { if (this->peekChar() == '{') { ++this->index; terminated = true; break; } cooked += ch; raw += ch; } else if (isLineTerminator(ch)) { ++this->lineNumber; if (ch == '\r' && this->peekChar() == '\n') { ++this->index; } if (ch == 0x2028 || ch == 0x2029) { raw += ch; } else { raw += '\n'; } this->lineStart = this->index; } else { raw += ch; } } } ScanTemplateResult* result = new ScanTemplateResult(); result->head = head; result->tail = tail; result->valueRaw = UTF16StringData(raw.data(), raw.length()); if (error) { result->error = error; } else { result->valueCooked = UTF16StringData(cooked.data(), cooked.length()); } if (head) { start--; } token->setTemplateTokenResult(result, this->lineNumber, this->lineStart, start, this->index); } String* Scanner::scanRegExpBody() { char16_t ch = this->peekChar(); ASSERT(ch == '/'); // assert(ch == '/', 'Regular expression literal must start with a slash'); // TODO apply rope-string char16_t ch0 = this->peekChar(); ++this->index; UTF16StringDataNonGCStd str(&ch0, 1); bool classMarker = false; bool terminated = false; while (!this->eof()) { ch = this->peekCharWithoutEOF(); ++this->index; str += ch; if (ch == '\\') { ch = this->peekChar(); ++this->index; // ECMA-262 7.8.5 if (isLineTerminator(ch)) { this->throwUnexpectedToken(Messages::UnterminatedRegExp); } str += ch; } else if (isLineTerminator(ch)) { this->throwUnexpectedToken(Messages::UnterminatedRegExp); } else if (classMarker) { if (ch == ']') { classMarker = false; } } else { if (ch == '/') { terminated = true; break; } else if (ch == '[') { classMarker = true; } } } if (!terminated) { this->throwUnexpectedToken(Messages::UnterminatedRegExp); } // Exclude leading and trailing slash. str = str.substr(1, str.length() - 2); if (isAllASCII(str.data(), str.length())) { return new ASCIIString(str.data(), str.length()); } return new UTF16String(str.data(), str.length()); } String* Scanner::scanRegExpFlags() { // UTF16StringData str = ''; UTF16StringDataNonGCStd flags; while (!this->eof()) { char16_t ch = this->peekCharWithoutEOF(); if (!isIdentifierPart(ch)) { break; } ++this->index; if (ch == '\\' && !this->eof()) { ch = this->peekChar(); if (ch == 'u') { ++this->index; const size_t restore = this->index; char32_t ch32 = this->scanHexEscape('u'); if (ch32 != EMPTY_CODE_POINT) { ParserCharPiece piece(ch32); flags += UTF16StringDataNonGCStd(piece.data, piece.length); /* for (str += '\\u'; restore < this->index; ++restore) { str += this->source[restore]; }*/ } else { this->index = restore; flags += 'u'; // str += '\\u'; } this->throwUnexpectedToken(); } else { // str += '\\'; this->throwUnexpectedToken(); } } else { flags += ch; // str += ch; } } if (!flags.length()) { return String::emptyString; } if (isAllASCII(flags.data(), flags.length())) { return String::fromLatin1(flags.data(), flags.length()); } return new UTF16String(flags.data(), flags.length()); } void Scanner::scanRegExp(Scanner::ScannerResult* token) { ASSERT(token != nullptr); const size_t start = this->index; String* body = this->scanRegExpBody(); String* flags = this->scanRegExpFlags(); // const value = this->testRegExp(body.value, flags.value); ScanRegExpResult result; result.body = body; result.flags = flags; token->setResult(Token::RegularExpressionToken, this->lineNumber, this->lineStart, start, this->index); token->valueRegExp = result; } // ECMA-262 11.6.2.1 Keywords static ALWAYS_INLINE KeywordKind getKeyword(const StringBufferAccessData& data) { // 'const' is specialized as Keyword in V8. // 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next. // Some others are from future reserved words. size_t length = data.length; char16_t first = data.charAt(0); char16_t second; switch (first) { case 'a': switch (length) { case 2: if (data.charAt(1) == 's') { return AsKeyword; } break; case 5: second = data.charAt(1); if (second == 's' && data.equalsSameLength("async", 2)) { return AsyncKeyword; } else if (second == 'w' && data.equalsSameLength("await", 2)) { return AwaitKeyword; } break; case 9: if (data.equalsSameLength("arguments", 1)) { return ArgumentsKeyword; } break; } break; case 'b': if (length == 5 && data.equalsSameLength("break", 1)) { return BreakKeyword; } break; case 'c': if (length == 4) { if (data.equalsSameLength("case", 1)) { return CaseKeyword; } } else if (length == 5) { second = data.charAt(1); if (second == 'a' && data.equalsSameLength("catch", 2)) { return CatchKeyword; } else if (second == 'o' && data.equalsSameLength("const", 2)) { return ConstKeyword; } else if (second == 'l' && data.equalsSameLength("class", 2)) { return ClassKeyword; } } else if (length == 8 && data.equalsSameLength("continue", 1)) { return ContinueKeyword; } break; case 'd': switch (length) { case 2: if (data.charAt(1) == 'o') { return DoKeyword; } break; case 6: if (data.equalsSameLength("delete", 1)) { return DeleteKeyword; } break; case 7: if (data.equalsSameLength("default", 1)) { return DefaultKeyword; } break; case 8: if (data.equalsSameLength("debugger", 1)) { return DebuggerKeyword; } break; } break; case 'e': switch (length) { case 4: second = data.charAt(1); if (second == 'l' && data.equalsSameLength("else", 2)) { return ElseKeyword; } else if (second == 'n' && data.equalsSameLength("enum", 2)) { return EnumKeyword; } else if (second == 'v' && data.equalsSameLength("eval", 2)) { return EvalKeyword; } break; case 6: if (data.equalsSameLength("export", 1)) { return ExportKeyword; } break; case 7: if (data.equalsSameLength("extends", 1)) { return ExtendsKeyword; } break; } break; case 'f': switch (length) { case 3: if (data.equalsSameLength("for", 1)) { return ForKeyword; } break; case 4: if (data.equalsSameLength("from", 1)) { return FromKeyword; } break; case 5: if (data.equalsSameLength("false", 1)) { return FalseKeyword; } break; case 7: if (data.equalsSameLength("finally", 1)) { return FinallyKeyword; } break; case 8: if (data.equalsSameLength("function", 1)) { return FunctionKeyword; } break; } break; case 'g': if (length == 3 && data.equalsSameLength("get", 1)) { return GetKeyword; } break; case 'i': switch (length) { case 2: second = data.charAt(1); if (second == 'f') { return IfKeyword; } else if (second == 'n') { return InKeyword; } break; case 6: if (data.equalsSameLength("import", 1)) { return ImportKeyword; } break; case 9: if (data.equalsSameLength("interface", 1)) { return InterfaceKeyword; } break; case 10: second = data.charAt(1); if (second == 'n' && data.equalsSameLength("instanceof", 2)) { return InstanceofKeyword; } else if (second == 'm' && data.equalsSameLength("implements", 2)) { return ImplementsKeyword; } break; } break; case 'l': if (length == 3 && data.equalsSameLength("let", 1)) { return LetKeyword; } break; case 'n': if (length == 3 && data.equalsSameLength("new", 1)) { return NewKeyword; } else if (length == 4 && data.equalsSameLength("null", 1)) { return NullKeyword; } break; case 'o': if (length == 2 && data.charAt(1) == 'f') { return OfKeyword; } break; case 'p': switch (length) { case 6: if (data.equalsSameLength("public", 1)) { return PublicKeyword; } break; case 7: second = data.charAt(1); if (second == 'a' && data.equalsSameLength("package", 2)) { return PackageKeyword; } else if (second == 'r' && data.equalsSameLength("private", 2)) { return PrivateKeyword; } break; case 9: if (data.equalsSameLength("protected", 1)) { return ProtectedKeyword; } break; } break; case 'r': if (length == 6 && data.equalsSameLength("return", 1)) { return ReturnKeyword; } break; case 's': switch (length) { case 3: if (data.equalsSameLength("set", 1)) { return SetKeyword; } break; case 5: if (data.equalsSameLength("super", 1)) { return SuperKeyword; } break; case 6: second = data.charAt(1); if (second == 'w' && data.equalsSameLength("switch", 2)) { return SwitchKeyword; } else if (second == 't' && data.equalsSameLength("static", 2)) { return StaticKeyword; } break; } break; case 't': switch (length) { case 3: if (data.equalsSameLength("try", 1)) { return TryKeyword; } break; case 4: second = data.charAt(1); if (second == 'h' && data.equalsSameLength("this", 2)) { return ThisKeyword; } else if (second == 'r' && data.equalsSameLength("true", 2)) { return TrueKeyword; } break; case 5: if (data.equalsSameLength("throw", 1)) { return ThrowKeyword; } break; case 6: if (data.equalsSameLength("typeof", 1)) { return TypeofKeyword; } break; } break; case 'v': if (length == 3 && data.equalsSameLength("var", 1)) { return VarKeyword; } else if (length == 4 && data.equalsSameLength("void", 1)) { return VoidKeyword; } break; case 'w': if (length == 4 && data.equalsSameLength("with", 1)) { return WithKeyword; } else if (length == 5 && data.equalsSameLength("while", 1)) { return WhileKeyword; } break; case 'y': if (length == 5 && data.equalsSameLength("yield", 1)) { return YieldKeyword; } break; } return NotKeyword; } ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0) { ASSERT(token != nullptr); Token type = Token::IdentifierToken; const size_t start = this->index; // Backslash (U+005C) starts an escaped character. ScanIDResult id = UNLIKELY(ch0 == 0x5C) ? this->getComplexIdentifier() : this->getIdentifier(); const auto& data = std::get<0>(id); const size_t end = this->index; // There is no keyword or literal with only one character. // Thus, it must be an identifier. if (data.length > 1) { KeywordKind keywordKind = getKeyword(data); token->secondaryKeywordKind = keywordKind; switch (keywordKind) { case NotKeyword: break; case NullKeyword: type = Token::NullLiteralToken; break; case TrueKeyword: case FalseKeyword: type = BooleanLiteralToken; break; case YieldKeyword: case LetKeyword: token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind); return; default: if (keywordKind >= StrictModeReservedWord) { break; } token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind); return; } } if (UNLIKELY(std::get<1>(id) != nullptr)) { token->setResult(type, std::get<1>(id), this->lineNumber, this->lineStart, start, end); } else { token->setResult(type, start, end, this->lineNumber, this->lineStart, start, end); } } void Scanner::lex(Scanner::ScannerResult* token) { ASSERT(token != nullptr); token->resetResult(); if (UNLIKELY(this->eof())) { token->setResult(Token::EOFToken, this->lineNumber, this->lineStart, this->index, this->index); return; } char16_t cp = this->peekCharWithoutEOF(); if (UNLIKELY(cp >= 128 && cp >= 0xD800 && cp < 0xDFFF)) { ++this->index; char32_t ch2 = this->peekChar(); if (U16_IS_TRAIL(ch2)) { cp = U16_GET_SUPPLEMENTARY(cp, ch2); } else { this->throwUnexpectedToken(); } } if (isIdentifierStart(cp)) { goto ScanID; } // String literal starts with single quote (U+0027) or double quote (U+0022). if (cp == 0x27 || cp == 0x22) { this->scanStringLiteral(token); return; } // Dot (.) U+002E can also start a floating-point number, hence the need // to check the next character. if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) { this->scanNumericLiteral(token); return; } if (isDecimalDigit(cp)) { this->scanNumericLiteral(token); return; } if (UNLIKELY(cp == '`')) { ++this->index; this->scanTemplate(token, true); return; } // Possible identifier start in a surrogate pair. if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) { goto ScanID; } this->scanPunctuator(token, cp); return; ScanID: this->scanIdentifier(token, cp); return; } } // namespace Escargot