escargot/src/parser/Lexer.cpp

/*
 * Copyright (c) 2016-present Samsung Electronics Co., Ltd
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
 *  USA
 */

#include "Escargot.h"
#include "parser/Lexer.h"

// These two must be the last because they overwrite the ASSERT macro.
#include "double-conversion.h"
#include "ieee.h"

using namespace Escargot::EscargotLexer;

namespace Escargot {

const char* Messages::InvalidHexEscapeSequence = "Invalid hexadecimal escape sequence";
const char* Messages::UnexpectedTokenIllegal = "Unexpected token ILLEGAL";
const char* Messages::UnterminatedRegExp = "Invalid regular expression: missing /";
const char* Messages::TemplateOctalLiteral = "Octal literals are not allowed in template strings.";

#define IDENT_RANGE_LONG 200

/* The largest code-point that an UTF16 surrogate pair can represent is 0x10ffff,
 * so any codepoint above this can be a valid value for empty. The UINT32_MAX is
 * chosen because it is a valid immediate for machine instructions. */
#define EMPTY_CODE_POINT UINT32_MAX

/* The largest octal value is 255, so any higher
 * value can represent an invalid octal value. */
#define NON_OCTAL_VALUE 256

char EscargotLexer::g_asciiRangeCharMap[128] = {
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    LexerIsCharWhiteSpace,
    LexerIsCharLineTerminator,
    LexerIsCharWhiteSpace,
    LexerIsCharWhiteSpace,
    LexerIsCharLineTerminator,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    LexerIsCharWhiteSpace,
    0,
    0,
    0,
    LexerIsCharIdentStart | LexerIsCharIdent,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    LexerIsCharIdent,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    0,
    LexerIsCharIdentStart | LexerIsCharIdent,
    0,
    0,
    LexerIsCharIdentStart | LexerIsCharIdent,
    0,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    LexerIsCharIdentStart | LexerIsCharIdent,
    0,
    0,
    0,
    0,
    0
};

NEVER_INLINE bool EscargotLexer::isWhiteSpaceSlowCase(char16_t ch)
{
    ASSERT(ch >= 0x80);

    if (LIKELY(ch < 0x1680)) {
        return (ch == 0xA0);
    }

    return (ch == 0x1680 || ch == 0x180E || ch == 0x2000 || ch == 0x2001
            || ch == 0x2002 || ch == 0x2003 || ch == 0x2004 || ch == 0x2005 || ch == 0x2006
            || ch == 0x2007 || ch == 0x2008 || ch == 0x2009 || ch == 0x200A || ch == 0x202F
            || ch == 0x205F || ch == 0x3000 || ch == 0xFEFF);
}

/* Starting codepoints of identifier ranges. */
static const uint16_t identRangeStart[429] = {
    170, 181, 183, 186, 192, 216, 248, 710, 736, 748, 750, 768, 886, 890, 895, 902, 908, 910, 931, 1015, 1155, 1162,
    1329, 1369, 1377, 1425, 1471, 1473, 1476, 1479, 1488, 1520, 1552, 1568, 1646, 1749, 1759, 1770, 1791, 1808, 1869,
    1984, 2042, 2048, 2112, 2208, 2276, 2406, 2417, 2437, 2447, 2451, 2474, 2482, 2486, 2492, 2503, 2507, 2519, 2524,
    2527, 2534, 2561, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, 2631, 2635, 2641, 2649, 2654, 2662, 2689,
    2693, 2703, 2707, 2730, 2738, 2741, 2748, 2759, 2763, 2768, 2784, 2790, 2817, 2821, 2831, 2835, 2858, 2866, 2869,
    2876, 2887, 2891, 2902, 2908, 2911, 2918, 2929, 2946, 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006,
    3014, 3018, 3024, 3031, 3046, 3072, 3077, 3086, 3090, 3114, 3133, 3142, 3146, 3157, 3160, 3168, 3174, 3201, 3205,
    3214, 3218, 3242, 3253, 3260, 3270, 3274, 3285, 3294, 3296, 3302, 3313, 3329, 3333, 3342, 3346, 3389, 3398, 3402,
    3415, 3424, 3430, 3450, 3458, 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3542, 3544, 3558, 3570, 3585, 3648, 3664,
    3713, 3716, 3719, 3722, 3725, 3732, 3737, 3745, 3749, 3751, 3754, 3757, 3771, 3776, 3782, 3784, 3792, 3804, 3840,
    3864, 3872, 3893, 3895, 3897, 3902, 3913, 3953, 3974, 3993, 4038, 4096, 4176, 4256, 4295, 4301, 4304, 4348, 4682,
    4688, 4696, 4698, 4704, 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, 4957, 4969, 4992, 5024, 5121,
    5743, 5761, 5792, 5870, 5888, 5902, 5920, 5952, 5984, 5998, 6002, 6016, 6103, 6108, 6112, 6155, 6160, 6176, 6272,
    6320, 6400, 6432, 6448, 6470, 6512, 6528, 6576, 6608, 6656, 6688, 6752, 6783, 6800, 6823, 6832, 6912, 6992, 7019,
    7040, 7168, 7232, 7245, 7376, 7380, 7416, 7424, 7676, 7960, 7968, 8008, 8016, 8025, 8027, 8029, 8031, 8064, 8118,
    8126, 8130, 8134, 8144, 8150, 8160, 8178, 8182, 8204, 8255, 8276, 8305, 8319, 8336, 8400, 8417, 8421, 8450, 8455,
    8458, 8469, 8472, 8484, 8486, 8488, 8490, 8508, 8517, 8526, 8544, 11264, 11312, 11360, 11499, 11520, 11559, 11565,
    11568, 11631, 11647, 11680, 11688, 11696, 11704, 11712, 11720, 11728, 11736, 11744, 12293, 12321, 12337, 12344,
    12353, 12441, 12449, 12540, 12549, 12593, 12704, 12784, 13312, 19968, 40960, 42192, 42240, 42512, 42560, 42612,
    42623, 42655, 42775, 42786, 42891, 42896, 42928, 42999, 43072, 43136, 43216, 43232, 43259, 43264, 43312, 43360,
    43392, 43471, 43488, 43520, 43584, 43600, 43616, 43642, 43739, 43744, 43762, 43777, 43785, 43793, 43808, 43816,
    43824, 43868, 43876, 43968, 44012, 44016, 44032, 55216, 55243, 63744, 64112, 64256, 64275, 64285, 64298, 64312,
    64318, 64320, 64323, 64326, 64467, 64848, 64914, 65008, 65024, 65056, 65075, 65101, 65136, 65142, 65296, 65313,
    65343, 65345, 65382, 65474, 65482, 65490, 65498, 65535
};

/* Lengths of identifier ranges. */
static const uint8_t identRangeLength[428] = {
    1, 1, 1, 1, 23, 31, 200, 12, 5, 1, 1, 117, 2, 4, 1, 5, 1, 20, 83, 139, 5, 166, 38, 1, 39, 45, 1, 2, 2, 1, 27, 3,
    11, 74, 102, 8, 10, 19, 1, 59, 101, 54, 1, 46, 28, 19, 128, 10, 19, 8, 2, 22, 7, 1, 4, 9, 2, 4, 1, 2, 5, 12, 3, 6,
    2, 22, 7, 2, 2, 2, 1, 5, 2, 3, 1, 4, 1, 16, 3, 9, 3, 22, 7, 2, 5, 10, 3, 3, 1, 4, 10, 3, 8, 2, 22, 7, 2, 5, 9, 2,
    3, 2, 2, 5, 10, 1, 2, 6, 3, 4, 2, 1, 2, 2, 3, 12, 5, 3, 4, 1, 1, 10, 4, 8, 3, 23, 16, 8, 3, 4, 2, 2, 4, 10, 3, 8,
    3, 23, 10, 5, 9, 3, 4, 2, 1, 4, 10, 2, 3, 8, 3, 41, 8, 3, 5, 1, 4, 10, 6, 2, 18, 24, 9, 1, 7, 1, 6, 1, 8, 10, 2,
    58, 15, 10, 2, 1, 2, 1, 1, 4, 7, 3, 1, 1, 2, 13, 3, 5, 1, 6, 10, 4, 1, 2, 10, 1, 1, 1, 10, 36, 20, 18, 36, 1, 74,
    78, 38, 1, 1, 43, 201, 4, 7, 1, 4, 41, 4, 33, 4, 7, 1, 4, 15, 57, 4, 67, 3, 9, 16, 85, 202, 17, 26, 75, 11, 13, 7,
    21, 20, 13, 3, 2, 84, 1, 2, 10, 3, 10, 88, 43, 70, 31, 12, 12, 40, 5, 44, 26, 11, 28, 63, 29, 11, 10, 1, 14, 76,
    10, 9, 116, 56, 10, 49, 3, 35, 2, 203, 204, 6, 38, 6, 8, 1, 1, 1, 31, 53, 7, 1, 3, 7, 4, 6, 13, 3, 7, 2, 2, 1, 1,
    1, 13, 13, 1, 12, 1, 1, 10, 1, 6, 1, 1, 1, 16, 4, 5, 1, 41, 47, 47, 133, 9, 38, 1, 1, 56, 1, 24, 7, 7, 7, 7, 7, 7,
    7, 7, 32, 3, 15, 5, 5, 86, 7, 90, 4, 41, 94, 27, 16, 205, 206, 207, 46, 208, 28, 48, 10, 31, 83, 9, 103, 4, 30, 2,
    49, 52, 69, 10, 24, 1, 46, 36, 29, 65, 11, 31, 55, 14, 10, 23, 73, 3, 16, 5, 6, 6, 6, 7, 7, 43, 4, 2, 43, 2, 10,
    209, 23, 49, 210, 106, 7, 5, 12, 13, 5, 1, 2, 2, 108, 211, 64, 54, 12, 16, 14, 2, 3, 5, 135, 10, 26, 1, 26, 89, 6,
    6, 6, 3
};

/* Lengths of identifier ranges greater than IDENT_RANGE_LONG. */
static const uint16_t identRangeLongLength[12] = {
    458, 333, 620, 246, 282, 6582, 20941, 1165, 269, 11172, 366, 363
};

static NEVER_INLINE bool isIdentifierPartSlow(char32_t ch)
{
    int bottom = 0;
    int top = (sizeof(identRangeStart) / sizeof(uint16_t)) - 1;

    while (true) {
        int middle = (bottom + top) >> 1;
        char32_t rangeStart = identRangeStart[middle];

        if (ch >= rangeStart) {
            if (ch < identRangeStart[middle + 1]) {
                char32_t length = identRangeLength[middle];

                if (UNLIKELY(length >= IDENT_RANGE_LONG)) {
                    length = identRangeLongLength[length - IDENT_RANGE_LONG];
                }
                return ch < rangeStart + length;
            }

            bottom = middle + 1;
        } else {
            top = middle;
        }

        if (bottom == top) {
            return false;
        }
    }
}

static ALWAYS_INLINE bool isIdentifierPart(char32_t ch)
{
    if (LIKELY(ch < 128)) {
        return g_asciiRangeCharMap[ch] & LexerIsCharIdent;
    }

    return isIdentifierPartSlow(ch);
}

static ALWAYS_INLINE bool isIdentifierStart(char32_t ch)
{
    if (LIKELY(ch < 128)) {
        return g_asciiRangeCharMap[ch] & LexerIsCharIdentStart;
    }

    return isIdentifierPartSlow(ch);
}

static ALWAYS_INLINE bool isDecimalDigit(char16_t ch)
{
    return (ch >= '0' && ch <= '9');
}

static ALWAYS_INLINE bool isHexDigit(char16_t ch)
{
    return isDecimalDigit(ch) || ((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');
}

static ALWAYS_INLINE bool isOctalDigit(char16_t ch)
{
    return (ch >= '0' && ch <= '7');
}

static ALWAYS_INLINE char16_t octalValue(char16_t ch)
{
    ASSERT(isOctalDigit(ch));
    return ch - '0';
}

static ALWAYS_INLINE uint8_t toHexNumericValue(char16_t ch)
{
    return ch < 'A' ? ch - '0' : ((ch - 'A' + 10) & 0xF);
}

static int hexValue(char16_t ch)
{
    if (ch >= '0' && ch <= '9') {
        return ch - '0';
    }

    ASSERT((ch | 0x20) >= 'a' && (ch | 0x20) <= 'f');

    return (ch | 0x20) - ('a' - 10);
}

struct ParserCharPiece {
    char16_t data[3];
    size_t length;

    ParserCharPiece(const char32_t a)
    {
        if (a < 0x10000) {
            data[0] = a;
            data[1] = 0;
            length = 1;
        } else {
            data[0] = (char16_t)(0xD800 + ((a - 0x10000) >> 10));
            data[1] = (char16_t)(0xDC00 + ((a - 0x10000) & 1023));
            data[2] = 0;
            length = 2;
        }
    }
};

AtomicString keywordToString(::Escargot::Context* ctx, KeywordKind keyword)
{
    switch (keyword) {
    case IfKeyword:
        return ctx->staticStrings().stringIf;
    case InKeyword:
        return ctx->staticStrings().stringIn;
    case DoKeyword:
        return ctx->staticStrings().stringDo;
    case VarKeyword:
        return ctx->staticStrings().stringVar;
    case ForKeyword:
        return ctx->staticStrings().stringFor;
    case NewKeyword:
        return ctx->staticStrings().stringNew;
    case TryKeyword:
        return ctx->staticStrings().stringTry;
    case ThisKeyword:
        return ctx->staticStrings().stringThis;
    case ElseKeyword:
        return ctx->staticStrings().stringElse;
    case CaseKeyword:
        return ctx->staticStrings().stringCase;
    case VoidKeyword:
        return ctx->staticStrings().stringVoid;
    case WithKeyword:
        return ctx->staticStrings().stringWith;
    case EnumKeyword:
        return ctx->staticStrings().stringEnum;
    case WhileKeyword:
        return ctx->staticStrings().stringWhile;
    case BreakKeyword:
        return ctx->staticStrings().stringBreak;
    case CatchKeyword:
        return ctx->staticStrings().stringCatch;
    case ThrowKeyword:
        return ctx->staticStrings().stringThrow;
    case ConstKeyword:
        return ctx->staticStrings().stringConst;
    case ClassKeyword:
        return ctx->staticStrings().stringClass;
    case SuperKeyword:
        return ctx->staticStrings().stringSuper;
    case ReturnKeyword:
        return ctx->staticStrings().stringReturn;
    case TypeofKeyword:
        return ctx->staticStrings().stringTypeof;
    case DeleteKeyword:
        return ctx->staticStrings().stringDelete;
    case SwitchKeyword:
        return ctx->staticStrings().stringSwitch;
    case ExportKeyword:
        return ctx->staticStrings().stringExport;
    case ImportKeyword:
        return ctx->staticStrings().stringImport;
    case DefaultKeyword:
        return ctx->staticStrings().stringDefault;
    case FinallyKeyword:
        return ctx->staticStrings().stringFinally;
    case ExtendsKeyword:
        return ctx->staticStrings().stringExtends;
    case FunctionKeyword:
        return ctx->staticStrings().function;
    case ContinueKeyword:
        return ctx->staticStrings().stringContinue;
    case DebuggerKeyword:
        return ctx->staticStrings().stringDebugger;
    case InstanceofKeyword:
        return ctx->staticStrings().stringInstanceof;
    case ImplementsKeyword:
        return ctx->staticStrings().implements;
    case InterfaceKeyword:
        return ctx->staticStrings().interface;
    case PackageKeyword:
        return ctx->staticStrings().package;
    case PrivateKeyword:
        return ctx->staticStrings().stringPrivate;
    case ProtectedKeyword:
        return ctx->staticStrings().stringProtected;
    case PublicKeyword:
        return ctx->staticStrings().stringPublic;
    case StaticKeyword:
        return ctx->staticStrings().stringStatic;
    case YieldKeyword:
        return ctx->staticStrings().yield;
    case LetKeyword:
        return ctx->staticStrings().let;
    default:
        ASSERT_NOT_REACHED();
        return ctx->staticStrings().stringError;
    }
}

void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* description, ErrorObject::Code code)
{
    UTF16StringDataNonGCStd msg = u"Line ";
    const size_t bufferLength = 64;
    char lineStringBuf[bufferLength];
    char* bufPtr = lineStringBuf + bufferLength - 2;

    /* Adds ": " at the end. */
    bufPtr[0] = ':';
    bufPtr[1] = ' ';

    size_t value = line;
    do {
        ASSERT(bufPtr > lineStringBuf);
        --bufPtr;
        *bufPtr = value % 10 + '0';
        value /= 10;
    } while (value > 0);

    msg += UTF16StringDataNonGCStd(bufPtr, lineStringBuf + bufferLength);

    if (description->length()) {
        msg += UTF16StringDataNonGCStd(description->toUTF16StringData().data());
    }

    esprima::Error* error = new (NoGC) esprima::Error(new UTF16String(msg.data(), msg.length()));
    error->index = index;
    error->lineNumber = line;
    error->column = col;
    error->description = description;
    error->errorCode = code;

    throw * error;
};

ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const
{
    return ParserStringView(source, this->start, this->end);
}

StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const
{
    return StringView(source, this->start, this->end);
}

ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source)
{
    return ParserStringView(source, this->start, this->end);
}

StringView Scanner::ScannerResult::relatedSource(const StringView& source)
{
    return StringView(source, this->start, this->end);
}

Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance)
{
    if (this->type == Token::KeywordToken) {
        return keywordToString(scannerInstance->escargotContext, this->valueKeywordKind).string();
    }

    if (this->hasAllocatedString) {
        if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
            constructStringLiteral(scannerInstance);
        }
        return this->valueStringLiteralData.m_stringIfNewlyAllocated;
    }

    return new StringView(scannerInstance->sourceAsNormalView, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
}

ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
{
    if (this->type == Token::KeywordToken) {
        AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind);
        return ParserStringView(as.string(), 0, as.string()->length());
    }
    if (this->hasAllocatedString) {
        if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
            constructStringLiteral(scannerInstance);
        }
        return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
    }
    return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
}

double Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance)
{
    if (this->hasNonComputedNumberLiteral) {
        const auto& bd = scannerInstance->source.bufferAccessData();
        char* buffer;
        int length = this->end - this->start;

        if (bd.has8BitContent) {
            buffer = ((char*)bd.buffer) + this->start;
        } else {
            buffer = ALLOCA(this->end - this->start, char, ec);

            for (int i = 0; i < length; i++) {
                buffer[i] = bd.uncheckedCharAtFor16Bit(i + this->start);
            }
        }

        int lengthDummy;
        double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::ALLOW_HEX
                                                                 | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
                                                                 | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES,
                                                             0.0, double_conversion::Double::NaN(),
                                                             "Infinity", "NaN");
        double ll = converter.StringToDouble(buffer, length, &lengthDummy);

        this->valueNumber = ll;
        this->hasNonComputedNumberLiteral = false;
    }
    return this->valueNumber;
}

void Scanner::ScannerResult::constructStringLiteralHelperAppendUTF16(Scanner* scannerInstance, char16_t ch, UTF16StringDataNonGCStd& stringUTF16, bool& isEveryCharLatin1)
{
    switch (ch) {
    case 'u':
    case 'x': {
        char32_t param;
        if (scannerInstance->peekChar() == '{') {
            ++scannerInstance->index;
            param = scannerInstance->scanUnicodeCodePointEscape();
        } else {
            param = scannerInstance->scanHexEscape(ch);
        }
        ParserCharPiece piece(param);
        stringUTF16.append(piece.data, piece.data + piece.length);
        if (piece.length != 1 || piece.data[0] >= 256) {
            isEveryCharLatin1 = false;
        }
        return;
    }
    case 'n':
        stringUTF16 += '\n';
        return;
    case 'r':
        stringUTF16 += '\r';
        return;
    case 't':
        stringUTF16 += '\t';
        return;
    case 'b':
        stringUTF16 += '\b';
        return;
    case 'f':
        stringUTF16 += '\f';
        return;
    case 'v':
        stringUTF16 += '\x0B';
        return;

    default:
        if (ch && isOctalDigit(ch)) {
            uint16_t octToDec = scannerInstance->octalToDecimal(ch, true);
            stringUTF16 += octToDec;
            ASSERT(octToDec < 256);
        } else {
            stringUTF16 += ch;
            if (ch >= 256) {
                isEveryCharLatin1 = false;
            }
        }
        return;
    }
}

void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance)
{
    size_t indexBackup = scannerInstance->index;
    size_t lineNumberBackup = scannerInstance->lineNumber;
    size_t lineStartBackup = scannerInstance->lineStart;

    scannerInstance->index = this->start;
    const size_t start = this->start;
    char16_t quote = scannerInstance->peekChar();
    ASSERT((quote == '\'' || quote == '"'));
    // 'String literal must starts with a quote');

    ++scannerInstance->index;
    bool isEveryCharLatin1 = true;

    UTF16StringDataNonGCStd stringUTF16;
    while (true) {
        char16_t ch = scannerInstance->peekChar();
        ++scannerInstance->index;
        if (ch == quote) {
            quote = '\0';
            break;
        } else if (UNLIKELY(ch == '\\')) {
            ch = scannerInstance->peekChar();
            ++scannerInstance->index;
            if (!ch || !isLineTerminator(ch)) {
                this->constructStringLiteralHelperAppendUTF16(scannerInstance, ch, stringUTF16, isEveryCharLatin1);
            } else {
                ++scannerInstance->lineNumber;
                char16_t bufferedChar = scannerInstance->peekChar();
                if ((ch == '\r' && bufferedChar == '\n') || (ch == '\n' && bufferedChar == '\r')) {
                    ++scannerInstance->index;
                }
                scannerInstance->lineStart = scannerInstance->index;
            }
        } else if (UNLIKELY(isLineTerminator(ch))) {
            break;
        } else {
            stringUTF16 += ch;
            if (ch >= 256) {
                isEveryCharLatin1 = false;
            }
        }
    }

    scannerInstance->index = indexBackup;
    scannerInstance->lineNumber = lineNumberBackup;
    scannerInstance->lineStart = lineStartBackup;

    String* newStr;
    if (isEveryCharLatin1) {
        newStr = new Latin1String(stringUTF16.data(), stringUTF16.length());
    } else {
        newStr = new UTF16String(stringUTF16.data(), stringUTF16.length());
    }
    this->valueStringLiteralData.m_stringIfNewlyAllocated = newStr;
}

Scanner::Scanner(::Escargot::Context* escargotContext, StringView code, size_t startLine, size_t startColumn)
    : source(code, 0, code.length())
    , sourceAsNormalView(code)
    , escargotContext(escargotContext)
    , sourceCodeAccessData(code.bufferAccessData())
    , length(code.length())
    , index(0)
    , lineNumber(startLine)
    , lineStart(startColumn)
{
    ASSERT(escargotContext != nullptr);
    // trackComment = false;
}

void Scanner::skipSingleLineComment(void)
{
    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        ++this->index;

        if (isLineTerminator(ch)) {
            if (ch == 13 && this->peekCharWithoutEOF() == 10) {
                ++this->index;
            }
            ++this->lineNumber;
            this->lineStart = this->index;
            // return comments;
            return;
        }
    }
}

void Scanner::skipMultiLineComment(void)
{
    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        ++this->index;

        if (isLineTerminator(ch)) {
            if (ch == 0x0D && this->peekCharWithoutEOF() == 0x0A) {
                ++this->index;
            }
            ++this->lineNumber;
            this->lineStart = this->index;
        } else if (ch == 0x2A && this->peekCharWithoutEOF() == 0x2F) {
            // Block comment ends with '*/'.
            ++this->index;
            return;
        }
    }

    throwUnexpectedToken();
}

char32_t Scanner::scanHexEscape(char prefix)
{
    size_t len = (prefix == 'u') ? 4 : 2;
    char32_t code = 0;

    for (size_t i = 0; i < len; ++i) {
        if (!this->eof() && isHexDigit(this->peekCharWithoutEOF())) {
            code = code * 16 + hexValue(this->peekCharWithoutEOF());
            ++this->index;
        } else {
            return EMPTY_CODE_POINT;
        }
    }

    return code;
}

char32_t Scanner::scanUnicodeCodePointEscape()
{
    // At least, one hex digit is required.
    if (this->eof() || this->peekCharWithoutEOF() == '}') {
        this->throwUnexpectedToken();
    }

    char32_t code = 0;
    char16_t ch;

    while (!this->eof()) {
        ch = this->peekCharWithoutEOF();
        ++this->index;
        if (!isHexDigit(ch)) {
            break;
        }
        code = code * 16 + hexValue(ch);
    }

    if (code > 0x10FFFF || ch != '}') {
        this->throwUnexpectedToken();
    }

    return code;
}

Scanner::ScanIDResult Scanner::getIdentifier()
{
    const size_t start = this->index;
    ++this->index;
    while (UNLIKELY(!this->eof())) {
        const char16_t ch = this->peekCharWithoutEOF();
        if (UNLIKELY(ch == 0x5C)) {
            // Blackslash (U+005C) marks Unicode escape sequence.
            this->index = start;
            return this->getComplexIdentifier();
        } else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) {
            // Need to handle surrogate pairs.
            this->index = start;
            return this->getComplexIdentifier();
        }
        if (isIdentifierPart(ch)) {
            ++this->index;
        } else {
            break;
        }
    }

    const auto& srcData = this->source.bufferAccessData();
    StringBufferAccessData ad(srcData.has8BitContent, this->index - start,
                              srcData.has8BitContent ? reinterpret_cast<void*>(((LChar*)srcData.buffer) + start) : reinterpret_cast<void*>(((char16_t*)srcData.buffer) + start));

    return std::make_tuple(ad, nullptr);
}

Scanner::ScanIDResult Scanner::getComplexIdentifier()
{
    char16_t cp = this->codePointAt(this->index);
    ParserCharPiece piece = ParserCharPiece(cp);
    UTF16StringDataNonGCStd id(piece.data, piece.length);
    this->index += id.length();

    // '\u' (U+005C, U+0075) denotes an escaped character.
    char32_t ch;
    if (cp == 0x5C) {
        if (this->peekChar() != 0x75) {
            this->throwUnexpectedToken();
        }
        ++this->index;
        if (this->peekChar() == '{') {
            ++this->index;
            ch = this->scanUnicodeCodePointEscape();
        } else {
            ch = this->scanHexEscape('u');
            cp = ch;
            if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) {
                this->throwUnexpectedToken();
            }
        }
        id = ch;
    }

    while (!this->eof()) {
        cp = this->codePointAt(this->index);
        if (!isIdentifierPart(cp)) {
            break;
        }

        // ch = Character.fromCodePoint(cp);
        ch = cp;

        if (this->peekChar() >= 0xD800 && this->peekChar() < 0xDFFF) {
            ch = peekChar();
            ++this->index;
            char32_t ch2 = this->peekChar();
            if (U16_IS_TRAIL(ch2)) {
                ch = U16_GET_SUPPLEMENTARY(ch, ch2);
            }
            --this->index;
        }
        piece = ParserCharPiece(ch);
        id += UTF16StringDataNonGCStd(piece.data, piece.length);
        this->index += piece.length;

        // '\u' (U+005C, U+0075) denotes an escaped character.
        if (cp == 0x5C) {
            // id = id.substr(0, id.length - 1);
            id.erase(id.length() - 1);

            if (this->peekChar() != 0x75) {
                this->throwUnexpectedToken();
            }
            ++this->index;
            if (this->peekChar() == '{') {
                ++this->index;
                ch = this->scanUnicodeCodePointEscape();
            } else {
                ch = this->scanHexEscape('u');
                cp = ch;
                if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierPart(cp)) {
                    this->throwUnexpectedToken();
                }
            }
            piece = ParserCharPiece(ch);
            id += UTF16StringDataNonGCStd(piece.data, piece.length);
        }
    }

    String* str = new UTF16String(id.data(), id.length());
    return std::make_tuple(str->bufferAccessData(), str);
}

uint16_t Scanner::octalToDecimal(char16_t ch, bool octal)
{
    // \0 is not octal escape sequence
    char16_t code = octalValue(ch);

    octal |= (ch != '0');

    if (!this->eof() && isOctalDigit(this->peekChar())) {
        octal = true;
        code = code * 8 + octalValue(this->peekChar());
        ++this->index;

        // 3 digits are only allowed when string starts
        // with 0, 1, 2, 3
        // if ('0123'.indexOf(ch) >= 0 && !this->eof() && Character.isOctalDigit(this->source.charCodeAt(this->index))) {
        if ((ch >= '0' && ch <= '3') && !this->eof() && isOctalDigit(this->peekChar())) {
            code = code * 8 + octalValue(this->peekChar());
            ++this->index;
        }
    }

    ASSERT(!octal || code < NON_OCTAL_VALUE);
    return octal ? code : NON_OCTAL_VALUE;
};

void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)
{
    const size_t start = this->index;
    PunctuatorKind kind;
    // Check for most common single-character punctuators.
    ++this->index;

    switch (ch) {
    case '(':
        kind = LeftParenthesis;
        break;

    case '{':
        kind = LeftBrace;
        break;

    case '.':
        kind = Period;
        if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
            // Spread operator "..."
            this->index += 2;
            kind = PeriodPeriodPeriod;
        }
        break;

    case '}':
        kind = RightBrace;
        break;
    case ')':
        kind = RightParenthesis;
        break;
    case ';':
        kind = SemiColon;
        break;
    case ',':
        kind = Comma;
        break;
    case '[':
        kind = LeftSquareBracket;
        break;
    case ']':
        kind = RightSquareBracket;
        break;
    case ':':
        kind = Colon;
        break;
    case '?':
        kind = GuessMark;
        break;
    case '~':
        kind = Wave;
        break;

    case '>':
        ch = this->peekChar();
        kind = RightInequality;

        if (ch == '>') {
            ++this->index;
            ch = this->peekChar();
            kind = RightShift;

            if (ch == '>') {
                ++this->index;
                kind = UnsignedRightShift;

                if (this->peekChar() == '=') {
                    ++this->index;
                    kind = UnsignedRightShiftEqual;
                }
            } else if (ch == '=') {
                kind = RightShiftEqual;
                ++this->index;
            }
        } else if (ch == '=') {
            kind = RightInequalityEqual;
            ++this->index;
        }
        break;

    case '<':
        ch = this->peekChar();
        kind = LeftInequality;

        if (ch == '<') {
            ++this->index;
            kind = LeftShift;

            if (this->peekChar() == '=') {
                kind = LeftShiftEqual;
                ++this->index;
            }
        } else if (ch == '=') {
            kind = LeftInequalityEqual;
            ++this->index;
        }
        break;

    case '=':
        ch = this->peekChar();
        kind = Substitution;

        if (ch == '=') {
            ++this->index;
            kind = Equal;

            if (this->peekChar() == '=') {
                kind = StrictEqual;
                ++this->index;
            }
        } else if (ch == '>') {
            kind = Arrow;
            ++this->index;
        }
        break;

    case '!':
        kind = ExclamationMark;

        if (this->peekChar() == '=') {
            ++this->index;
            kind = NotEqual;

            if (this->peekChar() == '=') {
                kind = NotStrictEqual;
                ++this->index;
            }
        }
        break;

    case '&':
        ch = this->peekChar();
        kind = BitwiseAnd;

        if (ch == '&') {
            kind = LogicalAnd;
            ++this->index;
        } else if (ch == '=') {
            kind = BitwiseAndEqual;
            ++this->index;
        }
        break;

    case '|':
        ch = this->peekChar();
        kind = BitwiseOr;

        if (ch == '|') {
            kind = LogicalOr;
            ++this->index;
        } else if (ch == '=') {
            kind = BitwiseOrEqual;
            ++this->index;
        }
        break;

    case '^':
        kind = BitwiseXor;

        if (this->peekChar() == '=') {
            kind = BitwiseXorEqual;
            ++this->index;
        }
        break;

    case '+':
        ch = this->peekChar();
        kind = Plus;

        if (ch == '+') {
            kind = PlusPlus;
            ++this->index;
        } else if (ch == '=') {
            kind = PlusEqual;
            ++this->index;
        }
        break;

    case '-':
        ch = this->peekChar();
        kind = Minus;

        if (ch == '-') {
            kind = MinusMinus;
            ++this->index;
        } else if (ch == '=') {
            kind = MinusEqual;
            ++this->index;
        }
        break;

    case '*':
        ch = this->peekChar();
        kind = Multiply;

        if (ch == '=') {
            kind = MultiplyEqual;
            ++this->index;
        } else if (ch == '*') {
            kind = Exponentiation;
            ++this->index;

            if (this->peekChar() == '=') {
                kind = ExponentiationEqual;
                ++this->index;
            }
        }
        break;

    case '/':
        kind = Divide;

        if (this->peekChar() == '=') {
            kind = DivideEqual;
            ++this->index;
        }
        break;

    case '%':
        kind = Mod;

        if (this->peekChar() == '=') {
            kind = ModEqual;
            ++this->index;
        }
        break;

    default:
        this->throwUnexpectedToken();
        kind = PunctuatorKindEnd;
        break;
    }

    token->setPunctuatorResult(this->lineNumber, this->lineStart, start, this->index, kind);
}

void Scanner::scanHexLiteral(Scanner::ScannerResult* token, size_t start)
{
    ASSERT(token != nullptr);
    uint64_t number = 0;
    double numberDouble = 0.0;
    bool shouldUseDouble = false;
    bool scanned = false;

    size_t shiftCount = 0;
    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        if (!isHexDigit(ch)) {
            break;
        }
        if (shouldUseDouble) {
            numberDouble = numberDouble * 16 + toHexNumericValue(ch);
        } else {
            number = (number << 4) + toHexNumericValue(ch);
            if (++shiftCount >= 16) {
                shouldUseDouble = true;
                numberDouble = number;
                number = 0;
            }
        }
        this->index++;
        scanned = true;
    }

    if (!scanned) {
        this->throwUnexpectedToken();
    }

    if (isIdentifierStart(this->peekChar())) {
        this->throwUnexpectedToken();
    }

    if (shouldUseDouble) {
        ASSERT(number == 0);
        token->setNumericLiteralResult(numberDouble, this->lineNumber, this->lineStart, start, this->index, false);
    } else {
        ASSERT(numberDouble == 0.0);
        token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
    }
}

void Scanner::scanBinaryLiteral(Scanner::ScannerResult* token, size_t start)
{
    ASSERT(token != nullptr);
    uint64_t number = 0;
    bool scanned = false;

    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        if (ch != '0' && ch != '1') {
            break;
        }
        number = (number << 1) + ch - '0';
        this->index++;
        scanned = true;
    }

    if (!scanned) {
        // only 0b or 0B
        this->throwUnexpectedToken();
    }

    if (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        /* istanbul ignore else */
        if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
            this->throwUnexpectedToken();
        }
    }

    token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
}

void Scanner::scanOctalLiteral(Scanner::ScannerResult* token, char16_t prefix, size_t start)
{
    ASSERT(token != nullptr);
    uint64_t number = 0;
    bool scanned = false;
    bool octal = isOctalDigit(prefix);

    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        if (!isOctalDigit(ch)) {
            break;
        }
        number = (number << 3) + ch - '0';
        this->index++;
        scanned = true;
    }

    if (!octal && !scanned) {
        // only 0o or 0O
        throwUnexpectedToken();
    }

    char16_t ch = this->peekChar();
    if (isIdentifierStart(ch) || isDecimalDigit(ch)) {
        throwUnexpectedToken();
    }

    token->setNumericLiteralResult(number, this->lineNumber, this->lineStart, start, this->index, false);
    token->octal = octal;
}

bool Scanner::isImplicitOctalLiteral()
{
    // Implicit octal, unless there is a non-octal digit.
    // (Annex B.1.1 on Numeric Literals)
    for (size_t i = this->index + 1; i < this->length; ++i) {
        const char16_t ch = this->sourceCharAt(i);
        if (ch == '8' || ch == '9') {
            return false;
        }
        if (!isOctalDigit(ch)) {
            return true;
        }
    }
    return true;
}

void Scanner::scanNumericLiteral(Scanner::ScannerResult* token)
{
    ASSERT(token != nullptr);
    const size_t start = this->index;
    char16_t ch = this->peekChar();
    char16_t startChar = ch;
    ASSERT(isDecimalDigit(ch) || (ch == '.'));
    // 'Numeric literal must start with a decimal digit or a decimal point');

    bool seenDotOrE = false;

    if (ch != '.') {
        auto number = this->peekChar();
        ++this->index;
        ch = this->peekChar();

        // Hex number starts with '0x'.
        // Octal number starts with '0'.
        // Octal number in ES6 starts with '0o'.
        // Binary number in ES6 starts with '0b'.
        if (number == '0') {
            if (ch == 'x' || ch == 'X') {
                ++this->index;
                return this->scanHexLiteral(token, start);
            }
            if (ch == 'b' || ch == 'B') {
                ++this->index;
                return this->scanBinaryLiteral(token, start);
            }
            if (ch == 'o' || ch == 'O') {
                ++this->index;
                return this->scanOctalLiteral(token, ch, start);
            }

            if (ch && isOctalDigit(ch) && this->isImplicitOctalLiteral()) {
                return this->scanOctalLiteral(token, ch, start);
            }
        }

        while (isDecimalDigit(this->peekChar())) {
            ++this->index;
        }
        ch = this->peekChar();
    }

    if (ch == '.') {
        seenDotOrE = true;
        ++this->index;
        while (isDecimalDigit(this->peekChar())) {
            ++this->index;
        }
        ch = this->peekChar();
    }

    if (ch == 'e' || ch == 'E') {
        seenDotOrE = true;
        ++this->index;

        ch = this->peekChar();
        if (ch == '+' || ch == '-') {
            ++this->index;
            ch = this->peekChar();
        }

        if (isDecimalDigit(ch)) {
            do {
                ++this->index;
                ch = this->peekChar();
            } while (isDecimalDigit(ch));
        } else {
            this->throwUnexpectedToken();
        }
    }

    if (!this->eof() && isIdentifierStart(this->peekChar())) {
        this->throwUnexpectedToken();
    }

    token->setNumericLiteralResult(0, this->lineNumber, this->lineStart, start, this->index, true);
    if (startChar == '0' && !seenDotOrE && (this->index - start) > 1) {
        token->startWithZero = true;
    }
}

void Scanner::scanStringLiteral(Scanner::ScannerResult* token)
{
    ASSERT(token != nullptr);
    const size_t start = this->index;
    char16_t quote = this->peekChar();
    ASSERT((quote == '\'' || quote == '"'));
    // 'String literal must starts with a quote');

    ++this->index;
    bool octal = false;
    bool isPlainCase = true;

    while (LIKELY(!this->eof())) {
        char16_t ch = this->peekCharWithoutEOF();
        ++this->index;
        if (ch == quote) {
            quote = '\0';
            break;
        } else if (UNLIKELY(ch == '\\')) {
            ch = this->peekChar();
            ++this->index;
            isPlainCase = false;
            if (!ch || !isLineTerminator(ch)) {
                switch (ch) {
                case 'u':
                    if (this->peekChar() == '{') {
                        ++this->index;
                        this->scanUnicodeCodePointEscape();
                    } else if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
                        this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
                    }
                    break;
                case 'x':
                    if (this->scanHexEscape(ch) == EMPTY_CODE_POINT) {
                        this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
                    }
                    break;
                case 'n':
                case 'r':
                case 't':
                case 'b':
                case 'f':
                case 'v':
                    break;

                default:
                    if (ch && isOctalDigit(ch)) {
                        octal |= (this->octalToDecimal(ch, false) != NON_OCTAL_VALUE);
                    } else if (isDecimalDigit(ch)) {
                        octal = true;
                    }
                    break;
                }
            } else {
                ++this->lineNumber;
                if (ch == '\r' && this->peekChar() == '\n') {
                    ++this->index;
                } else if (ch == '\n' && this->peekChar() == '\r') {
                    ++this->index;
                }
                this->lineStart = this->index;
            }
        } else if (UNLIKELY(isLineTerminator(ch))) {
            break;
        }
    }

    if (quote != '\0') {
        this->index = start;
        this->throwUnexpectedToken();
    }

    if (isPlainCase) {
        token->setResult(Token::StringLiteralToken, start + 1, this->index - 1, this->lineNumber, this->lineStart, start, this->index, octal);
    } else {
        // build string if needs
        token->setResult(Token::StringLiteralToken, (String*)nullptr, this->lineNumber, this->lineStart, start, this->index, octal);
    }
}

bool Scanner::isFutureReservedWord(const ParserStringView& id)
{
    const StringBufferAccessData& data = id.bufferAccessData();
    switch (data.length) {
    case 4:
        return data.equalsSameLength("enum");
    case 5:
        return data.equalsSameLength("super");
    case 6:
        return data.equalsSameLength("export") || data.equalsSameLength("import");
    }
    return false;
}

void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head)
{
    ASSERT(token != nullptr);
    // TODO apply rope-string
    UTF16StringDataNonGCStd cooked;
    UTF16StringDataNonGCStd raw;
    bool terminated = false;
    size_t start = this->index;

    bool tail = false;

    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        ++this->index;
        if (ch == '`') {
            tail = true;
            terminated = true;
            break;
        } else if (ch == '$') {
            if (this->peekChar() == '{') {
                ++this->index;
                terminated = true;
                break;
            }
            cooked += ch;
            raw += ch;
        } else if (ch == '\\') {
            raw += ch;
            ch = this->peekChar();
            if (!isLineTerminator(ch)) {
                auto currentIndex = this->index;
                ++this->index;
                switch (ch) {
                case 'n':
                    cooked += '\n';
                    break;
                case 'r':
                    cooked += '\r';
                    break;
                case 't':
                    cooked += '\t';
                    break;
                case 'u':
                    if (this->peekChar() == '{') {
                        ++this->index;
                        cooked += this->scanUnicodeCodePointEscape();
                    } else {
                        const size_t restore = this->index;
                        const char32_t unescaped = this->scanHexEscape(ch);
                        if (unescaped != EMPTY_CODE_POINT) {
                            ParserCharPiece piece(unescaped);
                            cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
                        } else {
                            this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
                        }
                    }
                    break;
                case 'x': {
                    const char32_t unescaped = this->scanHexEscape(ch);
                    if (unescaped == EMPTY_CODE_POINT) {
                        this->throwUnexpectedToken(Messages::InvalidHexEscapeSequence);
                    }
                    ParserCharPiece piece(unescaped);
                    cooked += UTF16StringDataNonGCStd(piece.data, piece.length);
                    break;
                }
                case 'b':
                    cooked += '\b';
                    break;
                case 'f':
                    cooked += '\f';
                    break;
                case 'v':
                    cooked += '\v';
                    break;
                default:
                    if (ch == '0') {
                        if (isDecimalDigit(this->peekChar())) {
                            // Illegal: \01 \02 and so on
                            this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
                        }
                        cooked += (char16_t)'\0';
                    } else if (isOctalDigit(ch)) {
                        // Illegal: \1 \2
                        this->throwUnexpectedToken(Messages::TemplateOctalLiteral);
                    } else {
                        cooked += ch;
                    }
                    break;
                }
                auto endIndex = this->index;
                for (size_t i = currentIndex; i < endIndex; i++) {
                    raw += this->sourceCharAt(i);
                }
            } else {
                ++this->index;
                ++this->lineNumber;
                if (ch == '\r' && this->peekChar() == '\n') {
                    ++this->index;
                }
                if (ch == 0x2028 || ch == 0x2029) {
                    raw += ch;
                } else {
                    raw += '\n';
                }
                this->lineStart = this->index;
            }
        } else if (isLineTerminator(ch)) {
            ++this->lineNumber;
            if (ch == '\r' && this->peekChar() == '\n') {
                ++this->index;
            }
            if (ch == 0x2028 || ch == 0x2029) {
                raw += ch;
                cooked += ch;
            } else {
                raw += '\n';
                cooked += '\n';
            }
            this->lineStart = this->index;
        } else {
            cooked += ch;
            raw += ch;
        }
    }

    if (!terminated) {
        this->throwUnexpectedToken();
    }

    ScanTemplateResult* result = new ScanTemplateResult();
    result->head = head;
    result->tail = tail;
    result->valueRaw = UTF16StringData(raw.data(), raw.length());
    result->valueCooked = UTF16StringData(cooked.data(), cooked.length());

    if (head) {
        start--;
    }

    token->setTemplateTokenResult(result, this->lineNumber, this->lineStart, start, this->index);
}

String* Scanner::scanRegExpBody()
{
    char16_t ch = this->peekChar();
    ASSERT(ch == '/');
    // assert(ch == '/', 'Regular expression literal must start with a slash');

    // TODO apply rope-string
    char16_t ch0 = this->peekChar();
    ++this->index;
    UTF16StringDataNonGCStd str(&ch0, 1);
    bool classMarker = false;
    bool terminated = false;

    while (!this->eof()) {
        ch = this->peekCharWithoutEOF();
        ++this->index;
        str += ch;
        if (ch == '\\') {
            ch = this->peekChar();
            ++this->index;
            // ECMA-262 7.8.5
            if (isLineTerminator(ch)) {
                this->throwUnexpectedToken(Messages::UnterminatedRegExp);
            }
            str += ch;
        } else if (isLineTerminator(ch)) {
            this->throwUnexpectedToken(Messages::UnterminatedRegExp);
        } else if (classMarker) {
            if (ch == ']') {
                classMarker = false;
            }
        } else {
            if (ch == '/') {
                terminated = true;
                break;
            } else if (ch == '[') {
                classMarker = true;
            }
        }
    }

    if (!terminated) {
        this->throwUnexpectedToken(Messages::UnterminatedRegExp);
    }

    // Exclude leading and trailing slash.
    str = str.substr(1, str.length() - 2);
    if (isAllASCII(str.data(), str.length())) {
        return new ASCIIString(str.data(), str.length());
    }

    return new UTF16String(str.data(), str.length());
}

String* Scanner::scanRegExpFlags()
{
    // UTF16StringData str = '';
    UTF16StringDataNonGCStd flags;
    while (!this->eof()) {
        char16_t ch = this->peekCharWithoutEOF();
        if (!isIdentifierPart(ch)) {
            break;
        }

        ++this->index;
        if (ch == '\\' && !this->eof()) {
            ch = this->peekChar();
            if (ch == 'u') {
                ++this->index;
                const size_t restore = this->index;
                char32_t ch32 = this->scanHexEscape('u');
                if (ch32 != EMPTY_CODE_POINT) {
                    ParserCharPiece piece(ch32);
                    flags += UTF16StringDataNonGCStd(piece.data, piece.length);
                    /*
                    for (str += '\\u'; restore < this->index; ++restore) {
                        str += this->source[restore];
                    }*/
                } else {
                    this->index = restore;
                    flags += 'u';
                    // str += '\\u';
                }
                this->throwUnexpectedToken();
            } else {
                // str += '\\';
                this->throwUnexpectedToken();
            }
        } else {
            flags += ch;
            // str += ch;
        }
    }

    if (isAllASCII(flags.data(), flags.length())) {
        return new ASCIIString(flags.data(), flags.length());
    }

    return new UTF16String(flags.data(), flags.length());
}

void Scanner::scanRegExp(Scanner::ScannerResult* token)
{
    ASSERT(token != nullptr);
    const size_t start = this->index;

    String* body = this->scanRegExpBody();
    String* flags = this->scanRegExpFlags();
    // const value = this->testRegExp(body.value, flags.value);

    ScanRegExpResult result;
    result.body = body;
    result.flags = flags;
    token->setResult(Token::RegularExpressionToken, this->lineNumber, this->lineStart, start, this->index);
    token->valueRegexp = result;
}

// ECMA-262 11.6.2.1 Keywords
static ALWAYS_INLINE KeywordKind isKeyword(const StringBufferAccessData& data)
{
    // 'const' is specialized as Keyword in V8.
    // 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
    // Some others are from future reserved words.

    size_t length = data.length;
    char16_t first = data.charAt(0);
    char16_t second;
    switch (first) {
    case 'b':
        if (length == 5 && data.equalsSameLength("break", 1)) {
            return BreakKeyword;
        }
        break;
    case 'c':
        if (length == 4) {
            if (data.equalsSameLength("case", 1)) {
                return CaseKeyword;
            }
        } else if (length == 5) {
            second = data.charAt(1);
            if (second == 'a' && data.equalsSameLength("catch", 2)) {
                return CatchKeyword;
            } else if (second == 'o' && data.equalsSameLength("const", 2)) {
                return ConstKeyword;
            } else if (second == 'l' && data.equalsSameLength("class", 2)) {
                return ClassKeyword;
            }
        } else if (length == 8 && data.equalsSameLength("continue", 1)) {
            return ContinueKeyword;
        }
        break;
    case 'd':
        if (length == 8) {
            if (data.equalsSameLength("debugger", 1)) {
                return DebuggerKeyword;
            }
        } else if (length == 2) {
            if (data.equalsSameLength("do", 1)) {
                return DoKeyword;
            }
        } else if (length == 6) {
            if (data.equalsSameLength("delete", 1)) {
                return DeleteKeyword;
            }
        } else if (length == 7) {
            if (data.equalsSameLength("default", 1)) {
                return DefaultKeyword;
            }
        }
        break;
    case 'e':
        if (length == 4) {
            second = data.charAt(1);
            if (second == 'l' && data.equalsSameLength("else", 2)) {
                return ElseKeyword;
            } else if (second == 'n' && data.equalsSameLength("enum", 2)) {
                return EnumKeyword;
            }
        } else if (length == 6 && data.equalsSameLength("export", 1)) {
            return ExportKeyword;
        } else if (length == 7 && data.equalsSameLength("extends", 1)) {
            return ExtendsKeyword;
        }
        break;
    case 'f':
        if (length == 3 && data.equalsSameLength("for", 1)) {
            return ForKeyword;
        } else if (length == 7 && data.equalsSameLength("finally", 1)) {
            return FinallyKeyword;
        } else if (length == 8 && data.equalsSameLength("function", 1)) {
            return FunctionKeyword;
        }
        break;
    case 'i':
        if (length == 2) {
            second = data.charAt(1);
            if (second == 'f') {
                return IfKeyword;
            } else if (second == 'n') {
                return InKeyword;
            }
        } else if (length == 6 && data.equalsSameLength("import", 1)) {
            return ImportKeyword;
        } else if (length == 10 && data.equalsSameLength("instanceof", 1)) {
            return InstanceofKeyword;
        }
        break;
    case 'l':
        if (length == 3 && data.equalsSameLength("let", 1)) {
            return LetKeyword;
        }
        break;
    case 'n':
        if (length == 3 && data.equalsSameLength("new", 1)) {
            return NewKeyword;
        }
        break;
    case 'r':
        if (length == 6 && data.equalsSameLength("return", 1)) {
            return ReturnKeyword;
        }
        break;
    case 's':
        if (length == 5 && data.equalsSameLength("super", 1)) {
            return SuperKeyword;
        } else if (length == 6 && data.equalsSameLength("switch", 1)) {
            return SwitchKeyword;
        }
        break;
    case 't':
        switch (length) {
        case 3:
            if (data.equalsSameLength("try", 1)) {
                return TryKeyword;
            }
            break;
        case 4:
            if (data.equalsSameLength("this", 1)) {
                return ThisKeyword;
            }
            break;
        case 5:
            if (data.equalsSameLength("throw", 1)) {
                return ThrowKeyword;
            }
            break;
        case 6:
            if (data.equalsSameLength("typeof", 1)) {
                return TypeofKeyword;
            }
            break;
        }
        break;
    case 'v':
        if (length == 3 && data.equalsSameLength("var", 1)) {
            return VarKeyword;
        } else if (length == 4 && data.equalsSameLength("void", 1)) {
            return VoidKeyword;
        }
        break;
    case 'w':
        if (length == 4 && data.equalsSameLength("with", 1)) {
            return WithKeyword;
        } else if (length == 5 && data.equalsSameLength("while", 1)) {
            return WhileKeyword;
        }
        break;
    case 'y':
        if (length == 5 && data.equalsSameLength("yield", 1)) {
            return YieldKeyword;
        }
        break;
    }
    return NotKeyword;
}

ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0)
{
    ASSERT(token != nullptr);
    Token type;
    const size_t start = this->index;

    // Backslash (U+005C) starts an escaped character.
    ScanIDResult id = UNLIKELY(ch0 == 0x5C) ? this->getComplexIdentifier() : this->getIdentifier();
    const size_t end = this->index;

    // There is no keyword or literal with only one character.
    // Thus, it must be an identifier.
    KeywordKind keywordKind;
    const auto& data = std::get<0>(id);
    if (data.length == 1) {
        type = Token::IdentifierToken;
    } else if ((keywordKind = isKeyword(data))) {
        token->setKeywordResult(this->lineNumber, this->lineStart, start, this->index, keywordKind);
        return;
    } else if (data.length == 4) {
        if (data.equalsSameLength("null")) {
            type = Token::NullLiteralToken;
        } else if (data.equalsSameLength("true")) {
            type = Token::BooleanLiteralToken;
        } else {
            type = Token::IdentifierToken;
        }
    } else if (data.length == 5 && data.equalsSameLength("false")) {
        type = Token::BooleanLiteralToken;
    } else {
        type = Token::IdentifierToken;
    }

    if (UNLIKELY(std::get<1>(id) != nullptr)) {
        token->setResult(type, std::get<1>(id), this->lineNumber, this->lineStart, start, end);
    } else {
        token->setResult(type, start, end, this->lineNumber, this->lineStart, start, end);
    }
}

void Scanner::lex(Scanner::ScannerResult* token)
{
    ASSERT(token != nullptr);
    if (UNLIKELY(this->eof())) {
        token->setResult(Token::EOFToken, this->lineNumber, this->lineStart, this->index, this->index);
        return;
    }

    const char16_t cp = this->peekCharWithoutEOF();

    if (isIdentifierStart(cp)) {
        goto ScanID;
    }
    // String literal starts with single quote (U+0027) or double quote (U+0022).
    if (cp == 0x27 || cp == 0x22) {
        this->scanStringLiteral(token);
        return;
    }

    // Dot (.) U+002E can also start a floating-point number, hence the need
    // to check the next character.
    if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
        this->scanNumericLiteral(token);
        return;
    }

    if (isDecimalDigit(cp)) {
        this->scanNumericLiteral(token);
        return;
    }

    if (UNLIKELY(cp == '`')) {
        ++this->index;
        this->scanTemplate(token, true);
        return;
    }

    // Possible identifier start in a surrogate pair.
    if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) {
        goto ScanID;
    }
    this->scanPunctuator(token, cp);
    return;

ScanID:
    this->scanIdentifier(token, cp);
    return;
}
}