Implement compress CompressibleStrings on GC reclaim end event

* Compress CompressibleStrings on GC reclaim end event
  - if there is reference about data of CompressibleString on stack, we should give up compressing.
    we don't need to search heap space because I redesigned StringView
    (we should not store string buffer data on heap without owner)
* Redesign StringView
  - Don't save string buffer address as its member. because buffer of CompressibleString can be deleted
  - If we don't save string buffer address on StringView, parser performance may dropped.
    becuase parser access string data a lot.
    so I introduce ParserStringView. it saves buffer address. we should ParserStringView on parser only.
    we can save string buffer address while parsing. because GC is disabled while parsing.

* Enable CompressibleString always
* Implement cache of RegExpOptionStrings
* Implement finding system locale function on RuntimeICUBinder avoiding call uloc_getDefault.

Signed-off-by: Seonghyun Kim <sh8281.kim@samsung.com>
This commit is contained in:
Seonghyun Kim 2019-12-26 10:20:10 +09:00 committed by Hyukwoo Park
commit caa0fbc3fe
32 changed files with 1020 additions and 394 deletions

View file

@ -98,6 +98,10 @@ IF (${ESCARGOT_HOST} STREQUAL "android")
SET (ESCARGOT_LIBICU_SUPPORT OFF)
ENDIF()
#######################################################
# FLAGS FOR ADDITIONAL FUNCTION
#######################################################
FIND_PACKAGE (PkgConfig REQUIRED)
IF (${ESCARGOT_LIBICU_SUPPORT} STREQUAL "ON")
IF (${ESCARGOT_LIBICU_SUPPORT_WITH_DLOPEN} STREQUAL "ON")
@ -127,6 +131,8 @@ IF (${ESCARGOT_HOST} STREQUAL "tizen_obs")
SET (ESCARGOT_CXXFLAGS ${ESCARGOT_CXXFLAGS} ${DLOG_CFLAGS_OTHER})
ENDIF()
SET (ESCARGOT_DEFINITIONS ${ESCARGOT_DEFINITIONS} -DENABLE_COMPRESSIBLE_STRING)
#######################################################
# flags for $(MODE) : debug/release
#######################################################

View file

@ -328,16 +328,101 @@ StringRef* StringRef::createExternalFromUTF16(const char16_t* s, size_t len)
return toRef(new UTF16String(s, len, String::FromExternalMemory));
}
#if defined(ENABLE_SOURCE_COMPRESSION)
StringRef* StringRef::createFromUTF8ToCompressibleString(const char* s, size_t len)
bool StringRef::isCompressibleStringEnabled()
{
return toRef(String::fromUTF8ToCompressibleString(s, len));
#if defined(ENABLE_COMPRESSIBLE_STRING)
return true;
#else
return false;
#endif
}
StringRef* StringRef::createCompressibleString(const unsigned char* s, size_t len)
#if defined(ENABLE_COMPRESSIBLE_STRING)
StringRef* StringRef::createFromUTF8ToCompressibleString(ContextRef* context, const char* s, size_t len)
{
return toRef(new CompressibleString(s, len));
return toRef(String::fromUTF8ToCompressibleString(toImpl(context), s, len));
}
StringRef* StringRef::createFromUTF16ToCompressibleString(ContextRef* context, const char16_t* s, size_t len)
{
return toRef(new CompressibleString(toImpl(context), s, len));
}
StringRef* StringRef::createFromASCIIToCompressibleString(ContextRef* context, const char* s, size_t len)
{
return toRef(new CompressibleString(toImpl(context), s, len));
}
StringRef* StringRef::createFromLatin1ToCompressibleString(ContextRef* context, const unsigned char* s, size_t len)
{
return toRef(new CompressibleString(toImpl(context), s, len));
}
void* StringRef::allocateStringDataBufferForCompressibleString(size_t byteLength)
{
return CompressibleString::allocateStringDataBuffer(byteLength);
}
void StringRef::deallocateStringDataBufferForCompressibleString(void* ptr)
{
CompressibleString::deallocateStringDataBuffer(ptr);
}
StringRef* StringRef::createFromAlreadyAllocatedBufferToCompressibleString(ContextRef* context, void* buffer, size_t stringLen, bool is8Bit)
{
return toRef(new CompressibleString(toImpl(context), buffer, stringLen, is8Bit));
}
#else
StringRef* StringRef::createFromUTF8ToCompressibleString(ContextRef* context, const char* s, size_t len)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
StringRef* StringRef::createCompressibleString(ContextRef* context, const unsigned char* s, size_t len)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
StringRef* StringRef::createFromASCIIToCompressibleString(ContextRef* context, const char* s, size_t len)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
StringRef* StringRef::createFromLatin1ToCompressibleString(ContextRef* context, const unsigned char* s, size_t len)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
void* StringRef::allocateStringDataBufferForCompressibleString(size_t byteLength)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
void StringRef::deallocateStringDataBufferForCompressibleString(void* ptr)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
StringRef* StringRef::createFromAlreadyAllocatedBufferToCompressibleString(ContextRef* context, void* buffer, size_t stringLen, bool is8Bit)
{
ESCARGOT_LOG_ERROR("If you want to use this function, you should enable source compression");
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
}
#endif
StringRef* StringRef::emptyString()

View file

@ -774,10 +774,16 @@ public:
static StringRef* createExternalFromLatin1(const unsigned char* s, size_t len);
static StringRef* createExternalFromUTF16(const char16_t* s, size_t len);
#if defined(ENABLE_SOURCE_COMPRESSION)
static StringRef* createFromUTF8ToCompressibleString(const char* s, size_t len);
static StringRef* createCompressibleString(const unsigned char* s, size_t len);
#endif
// you can use these functions only if you enabled source compression
// you don't need to use CompressibleString when string is small(~128KB)
static bool isCompressibleStringEnabled();
static StringRef* createFromUTF8ToCompressibleString(ContextRef* context, const char* s, size_t len);
static StringRef* createFromUTF16ToCompressibleString(ContextRef* context, const char16_t* s, size_t len);
static StringRef* createFromASCIIToCompressibleString(ContextRef* context, const char* s, size_t len);
static StringRef* createFromLatin1ToCompressibleString(ContextRef* context, const unsigned char* s, size_t len);
static void* allocateStringDataBufferForCompressibleString(size_t byteLength);
static void deallocateStringDataBufferForCompressibleString(void* ptr);
static StringRef* createFromAlreadyAllocatedBufferToCompressibleString(ContextRef* context, void* buffer, size_t stringLen, bool is8Bit /* is ASCII or Latin1 */);
static StringRef* emptyString();
@ -789,7 +795,7 @@ public:
std::string toStdUTF8String();
// don't store this sturct
// don't store this sturct or string buffer
// this is only for temporary access
struct StringBufferAccessDataRef {
bool has8BitContent;

View file

@ -20,6 +20,8 @@
#ifndef __EscargotASTBuilder__
#define __EscargotASTBuilder__
#include "parser/ParserStringView.h"
namespace Escargot {
#define FOR_EACH_TARGET_NODE(F) \
@ -565,12 +567,12 @@ public:
return SyntaxNode(CallExpression);
}
void setValueStringLiteral(const StringView& string)
void setValueStringLiteral(const ParserStringView& string)
{
m_valueStringLiteral = string;
}
StringView& getValueStringLiteral()
const ParserStringView& valueStringLiteral()
{
return m_valueStringLiteral;
}
@ -581,14 +583,14 @@ public:
if (key.type() == Identifier) {
return key.name() == value;
} else if (key.type() == Literal) {
return getValueStringLiteral().equals(value);
return valueStringLiteral().equals(value);
}
return false;
}
private:
StringView m_valueStringLiteral; // for StringLiteralNode (valueStringLiteral method)
ParserStringView m_valueStringLiteral; // for StringLiteralNode (valueStringLiteral method)
};
class NodeGenerator {
@ -753,7 +755,7 @@ public:
return new (m_allocator) CallExpressionNode(taggedTemplateExpression->expr(), args);
}
void setValueStringLiteral(const StringView& string)
void setValueStringLiteral(const ParserStringView& string)
{
RELEASE_ASSERT_NOT_REACHED();
}

View file

@ -472,11 +472,21 @@ void ErrorHandler::throwError(size_t index, size_t line, size_t col, String* des
throw * error;
};
ParserStringView Scanner::SmallScannerResult::relatedSource(const ParserStringView& source) const
{
return ParserStringView(source, this->start, this->end);
}
StringView Scanner::SmallScannerResult::relatedSource(const StringView& source) const
{
return StringView(source, this->start, this->end);
}
ParserStringView Scanner::ScannerResult::relatedSource(const ParserStringView& source)
{
return ParserStringView(source, this->start, this->end);
}
StringView Scanner::ScannerResult::relatedSource(const StringView& source)
{
return StringView(source, this->start, this->end);
@ -495,22 +505,22 @@ Value Scanner::ScannerResult::valueStringLiteralToValue(Scanner* scannerInstance
return this->valueStringLiteralData.m_stringIfNewlyAllocated;
}
return new StringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
return new StringView(scannerInstance->sourceAsNormalView, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
}
StringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
ParserStringView Scanner::ScannerResult::valueStringLiteral(Scanner* scannerInstance)
{
if (this->type == Token::KeywordToken) {
AtomicString as = keywordToString(scannerInstance->escargotContext, this->valueKeywordKind);
return StringView(as.string(), 0, as.string()->length());
return ParserStringView(as.string(), 0, as.string()->length());
}
if (this->hasAllocatedString) {
if (!this->valueStringLiteralData.m_stringIfNewlyAllocated) {
constructStringLiteral(scannerInstance);
}
return StringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
return ParserStringView(this->valueStringLiteralData.m_stringIfNewlyAllocated);
}
return StringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
return ParserStringView(scannerInstance->source, this->valueStringLiteralData.m_start, this->valueStringLiteralData.m_end);
}
double Scanner::ScannerResult::valueNumberLiteral(Scanner* scannerInstance)
@ -656,8 +666,10 @@ void Scanner::ScannerResult::constructStringLiteral(Scanner* scannerInstance)
}
Scanner::Scanner(::Escargot::Context* escargotContext, StringView code, size_t startLine, size_t startColumn)
: source(code)
: source(code, 0, code.length())
, sourceAsNormalView(code)
, escargotContext(escargotContext)
, sourceCodeAccessData(code.bufferAccessData())
, length(code.length())
, index(0)
, lineNumber(((length > 0) ? 1 : 0) + startLine)
@ -773,15 +785,8 @@ Scanner::ScanIDResult Scanner::getIdentifier()
}
const auto& srcData = this->source.bufferAccessData();
StringBufferAccessData ad;
ad.has8BitContent = srcData.has8BitContent;
ad.length = this->index - start;
if (srcData.has8BitContent) {
ad.buffer = ((LChar*)srcData.buffer) + start;
} else {
ad.buffer = ((char16_t*)srcData.buffer) + start;
}
StringBufferAccessData ad(srcData.has8BitContent, this->index - start,
srcData.has8BitContent ? reinterpret_cast<void*>(((LChar*)srcData.buffer) + start) : reinterpret_cast<void*>(((char16_t*)srcData.buffer) + start));
return std::make_tuple(ad, nullptr);
}
@ -896,7 +901,7 @@ void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)
case '.':
kind = Period;
if (this->peekChar() == '.' && this->source.bufferedCharAt(this->index + 1) == '.') {
if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
// Spread operator "..."
this->index += 2;
kind = PeriodPeriodPeriod;
@ -1227,7 +1232,7 @@ bool Scanner::isImplicitOctalLiteral()
// Implicit octal, unless there is a non-octal digit.
// (Annex B.1.1 on Numeric Literals)
for (size_t i = this->index + 1; i < this->length; ++i) {
const char16_t ch = this->source.bufferedCharAt(i);
const char16_t ch = this->sourceCharAt(i);
if (ch == '8' || ch == '9') {
return false;
}
@ -1402,7 +1407,7 @@ void Scanner::scanStringLiteral(Scanner::ScannerResult* token)
}
}
bool Scanner::isFutureReservedWord(const StringView& id)
bool Scanner::isFutureReservedWord(const ParserStringView& id)
{
const StringBufferAccessData& data = id.bufferAccessData();
switch (data.length) {
@ -1508,7 +1513,7 @@ void Scanner::scanTemplate(Scanner::ScannerResult* token, bool head)
}
auto endIndex = this->index;
for (size_t i = currentIndex; i < endIndex; i++) {
raw += this->source.bufferedCharAt(i);
raw += this->sourceCharAt(i);
}
} else {
++this->index;
@ -1897,7 +1902,7 @@ void Scanner::lex(Scanner::ScannerResult* token)
// Dot (.) U+002E can also start a floating-point number, hence the need
// to check the next character.
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->source.bufferedCharAt(this->index + 1))) {
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
this->scanNumericLiteral(token);
return;
}

View file

@ -21,6 +21,7 @@
#define __EscargotLexer__
#include "parser/esprima_cpp/esprima.h"
#include "parser/ParserStringView.h"
namespace Escargot {
@ -280,8 +281,9 @@ public:
KeywordKind valueKeywordKind;
};
ParserStringView relatedSource(const ParserStringView& source);
StringView relatedSource(const StringView& source);
StringView valueStringLiteral(Scanner* scannerInstance);
ParserStringView valueStringLiteral(Scanner* scannerInstance);
Value valueStringLiteralToValue(Scanner* scannerInstance);
double valueNumberLiteral(Scanner* scannerInstance);
@ -453,15 +455,17 @@ public:
this->type = InvalidToken;
}
ParserStringView relatedSource(const ParserStringView& source) const;
StringView relatedSource(const StringView& source) const;
};
// ScannerResult should be allocated on the stack by ALLOCA
COMPILE_ASSERT(sizeof(ScannerResult) < 512, "");
StringView source;
ParserStringView source;
StringView sourceAsNormalView;
::Escargot::Context* escargotContext;
// trackComment: boolean;
StringBufferAccessData sourceCodeAccessData;
size_t length;
size_t index;
@ -487,6 +491,11 @@ public:
ErrorHandler::throwError(this->index, this->lineNumber, this->index - this->lineStart + 1, new ASCIIString(message), ErrorObject::SyntaxError);
}
ALWAYS_INLINE char16_t sourceCharAt(const size_t idx) const
{
return sourceCodeAccessData.charAt(idx);
}
// ECMA-262 11.4 Comments
void skipSingleLineComment(void);
@ -496,20 +505,20 @@ public:
{
bool start = (this->index == 0);
while (LIKELY(!this->eof())) {
char16_t ch = this->source.bufferedCharAt(this->index);
char16_t ch = this->sourceCharAt(this->index);
if (isWhiteSpace(ch)) {
++this->index;
} else if (isLineTerminator(ch)) {
++this->index;
if (ch == 0x0D && this->source.bufferedCharAt(this->index) == 0x0A) {
if (ch == 0x0D && this->sourceCharAt(this->index) == 0x0A) {
++this->index;
}
++this->lineNumber;
this->lineStart = this->index;
start = true;
} else if (ch == 0x2F) { // U+002F is '/'
ch = this->source.bufferedCharAt(this->index + 1);
ch = this->sourceCharAt(this->index + 1);
if (ch == 0x2F) {
this->index += 2;
this->skipSingleLineComment();
@ -522,7 +531,7 @@ public:
}
} else if (start && ch == 0x2D) { // U+002D is '-'
// U+003E is '>'
if ((this->source.bufferedCharAt(this->index + 1) == 0x2D) && (this->source.bufferedCharAt(this->index + 2) == 0x3E)) {
if ((this->sourceCharAt(this->index + 1) == 0x2D) && (this->sourceCharAt(this->index + 2) == 0x3E)) {
// '-->' is a single-line comment
this->index += 3;
this->skipSingleLineComment();
@ -531,9 +540,9 @@ public:
}
} else if (ch == 0x3C) { // U+003C is '<'
if (this->length > this->index + 4) {
if (this->source.bufferedCharAt(this->index + 1) == '!'
&& this->source.bufferedCharAt(this->index + 2) == '-'
&& this->source.bufferedCharAt(this->index + 3) == '-') {
if (this->sourceCharAt(this->index + 1) == '!'
&& this->sourceCharAt(this->index + 2) == '-'
&& this->sourceCharAt(this->index + 3) == '-') {
this->index += 4; // `<!--`
this->skipSingleLineComment();
} else {
@ -549,13 +558,13 @@ public:
}
}
bool isFutureReservedWord(const StringView& id);
bool isFutureReservedWord(const ParserStringView& id);
void convertToKeywordInStrictMode(ScannerResult* token)
{
ASSERT(token->type == Token::IdentifierToken);
const StringView& keyword = token->relatedSource(this->source);
const auto& keyword = token->relatedSource(this->source);
if (keyword.equals("let")) {
token->setKeywordResult(token->lineNumber, token->lineStart, token->start, token->end, KeywordKind::LetKeyword);
} else if (keyword.equals("yield")) {
@ -617,9 +626,9 @@ public:
char32_t codePointAt(size_t i)
{
char32_t cp, first, second;
cp = this->source.bufferedCharAt(i);
cp = this->sourceCharAt(i);
if (cp >= 0xD800 && cp <= 0xDBFF) {
second = this->source.bufferedCharAt(i + 1);
second = this->sourceCharAt(i + 1);
if (second >= 0xDC00 && second <= 0xDFFF) {
first = cp;
cp = (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
@ -640,12 +649,12 @@ public:
private:
ALWAYS_INLINE char16_t peekCharWithoutEOF()
{
return this->source.bufferedCharAt(this->index);
return this->sourceCharAt(this->index);
}
ALWAYS_INLINE char16_t peekChar()
{
return UNLIKELY(this->eof()) ? 0 : this->source.bufferedCharAt(this->index);
return UNLIKELY(this->eof()) ? 0 : this->sourceCharAt(this->index);
}
char32_t scanHexEscape(char prefix);

View file

@ -0,0 +1,168 @@
/*
* Copyright (c) 2019-present Samsung Electronics Co., Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#ifndef __EscargotParserStringView__
#define __EscargotParserStringView__
namespace Escargot {
class ParserStringView : public String {
public:
ALWAYS_INLINE ParserStringView(const ParserStringView& str, const size_t s, const size_t e)
: String()
{
initBufferAccessData(str.bufferAccessData(), s, e);
}
ALWAYS_INLINE ParserStringView(const StringView& str, const size_t s, const size_t e)
: String()
{
initBufferAccessData(str.bufferAccessData(), s, e);
}
ALWAYS_INLINE ParserStringView(String* str)
: String()
{
initBufferAccessData(str->bufferAccessData(), 0, str->length());
}
ALWAYS_INLINE ParserStringView(String* str, size_t s, size_t e)
: String()
{
initBufferAccessData(str->bufferAccessData(), s, e);
}
ALWAYS_INLINE ParserStringView()
: String()
{
initBufferAccessData(String::emptyString->bufferAccessData(), 0, 0);
}
template <const size_t srcLen>
bool operator==(const char (&src)[srcLen]) const
{
return equals(src, srcLen - 1);
}
template <const size_t srcLen>
bool equals(const char (&src)[srcLen]) const
{
return equals(src, srcLen - 1);
}
bool equals(const char* src, size_t srcLen) const
{
if (srcLen != length()) {
return false;
}
const auto& data = m_bufferData;
if (data.has8BitContent) {
for (size_t i = 0; i < srcLen; i++) {
if (src[i] != ((const LChar*)data.buffer)[i]) {
return false;
}
}
} else {
for (size_t i = 0; i < srcLen; i++) {
if (src[i] != ((const char16_t*)data.buffer)[i]) {
return false;
}
}
}
return true;
}
template <const size_t srcLen>
bool operator!=(const char (&src)[srcLen]) const
{
return !operator==(src);
}
virtual UTF16StringData toUTF16StringData() const override
{
UTF16StringData ret;
size_t len = length();
ret.resizeWithUninitializedValues(len);
for (size_t i = 0; i < len; i++) {
ret[i] = charAt(i);
}
return ret;
}
virtual UTF8StringData toUTF8StringData() const override
{
return bufferAccessData().toUTF8String<UTF8StringData, UTF8StringDataNonGCStd>();
}
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const override
{
return bufferAccessData().toUTF8String<UTF8StringDataNonGCStd>();
}
virtual const LChar* characters8() const override
{
ASSERT(has8BitContent());
return (LChar*)m_bufferData.buffer;
}
virtual const char16_t* characters16() const override
{
ASSERT(!has8BitContent());
return (const char16_t*)m_bufferData.buffer;
}
virtual bool isStringView() override
{
return true;
}
char16_t bufferedCharAt(const size_t idx) const
{
if (m_bufferData.has8BitContent) {
return ((const LChar*)m_bufferData.buffer)[idx];
} else {
return ((const char16_t*)m_bufferData.buffer)[idx];
}
}
void* operator new(size_t size) = delete;
void* operator new[](size_t size) = delete;
protected:
ALWAYS_INLINE void initBufferAccessData(const StringBufferAccessData& srcData, size_t start, size_t end)
{
m_bufferData.has8BitContent = srcData.has8BitContent;
m_bufferData.length = end - start;
if (srcData.has8BitContent) {
m_bufferData.buffer = ((LChar*)srcData.buffer) + start;
} else {
m_bufferData.buffer = ((char16_t*)srcData.buffer) + start;
}
}
private:
};
} // Escargot
#endif

View file

@ -605,7 +605,7 @@ public:
} else if (token.type == Token::EOFToken) {
msg = Messages::UnexpectedEOS;
}
value = (String*)new StringView(token.relatedSource(this->scanner->source));
value = new StringView(this->scanner->sourceAsNormalView, token.start, token.end);
} else {
value = new ASCIIString("ILLEGAL");
}
@ -969,16 +969,16 @@ public:
{
ASSERT(token != nullptr);
ASTNode ret;
StringView sv = token->valueStringLiteral(this->scanner);
ParserStringView sv = token->valueStringLiteral(this->scanner);
const auto& a = sv.bufferAccessData();
char16_t firstCh = a.charAt(0);
if (a.length == 1 && firstCh < ESCARGOT_ASCII_TABLE_MAX) {
ret = builder.createIdentifierNode(this->escargotContext->staticStrings().asciiTable[firstCh]);
} else {
if (token->hasAllocatedString) {
ret = builder.createIdentifierNode(AtomicString(this->escargotContext, sv.string()));
ret = builder.createIdentifierNode(AtomicString(this->escargotContext, token->valueStringLiteralData.m_stringIfNewlyAllocated));
} else {
ret = builder.createIdentifierNode(AtomicString(this->escargotContext, sv));
ret = builder.createIdentifierNode(AtomicString(this->escargotContext, &sv));
}
}
@ -1370,7 +1370,7 @@ public:
param = this->parsePatternWithDefault(builder, params);
}
for (size_t i = 0; i < params.size(); i++) {
AtomicString as(this->escargotContext, params[i].relatedSource(this->scanner->source));
AtomicString as(this->escargotContext, params[i].relatedSource(this->scanner->sourceAsNormalView));
this->validateParam(options, params[i], as);
}
options.params.push_back(builder.convertToParameterSyntaxNode(param));
@ -1663,7 +1663,7 @@ public:
bool isSet = false;
bool needImplicitName = false;
if (token->type == Token::IdentifierToken && !isAsync && lookaheadPropertyKey) {
StringView sv = token->valueStringLiteral(this->scanner);
ParserStringView sv = token->valueStringLiteral(this->scanner);
const auto& d = sv.bufferAccessData();
if (d.length == 3) {
if (d.equalsSameLength("get")) {
@ -3544,7 +3544,7 @@ public:
if (!this->context->strict && this->matchKeyword(InKeyword)) {
this->nextToken();
left = this->finalize(this->createNode(), builder.createIdentifierNode(AtomicString(this->escargotContext, keyword.relatedSource(this->scanner->source))));
left = this->finalize(this->createNode(), builder.createIdentifierNode(AtomicString(this->escargotContext, keyword.relatedSource(this->scanner->sourceAsNormalView))));
init = nullptr;
type = statementTypeForIn;
} else {
@ -5016,7 +5016,7 @@ public:
this->context->strict = previousStrict;
closeBlock(classBlockContext);
return this->finalize(startNode, builder.template createClass<ClassType>(idNode, superClass, classBody, classBlockContext.childLexicalBlockIndex, StringView(this->scanner->source, startNode.index, endNode.index)));
return this->finalize(startNode, builder.template createClass<ClassType>(idNode, superClass, classBody, classBlockContext.childLexicalBlockIndex, StringView(this->scanner->sourceAsNormalView, startNode.index, endNode.index)));
}
template <class ASTBuilder>

View file

@ -404,13 +404,21 @@ bool ArrayObject::setArrayLength(ExecutionState& state, const uint32_t newLength
rd->m_arrayObjectFastModeBufferCapacity = 0;
}
} else {
const size_t minExpandCountForUsingLog2Function = 3;
auto rd = rareData();
size_t oldCapacity = rd ? (size_t)rd->m_arrayObjectFastModeBufferCapacity : oldLength;
if (newLength) {
rd = ensureObjectRareData();
if (newLength > oldCapacity) {
ComputeReservedCapacityFunctionWithPercent<133> f;
size_t newCapacity = f(newLength);
size_t newCapacity;
if (rd->m_arrayObjectFastModeBufferExpandCount >= minExpandCountForUsingLog2Function) {
ComputeReservedCapacityFunctionWithLog2<> f;
newCapacity = f(newLength);
} else {
ComputeReservedCapacityFunctionWithPercent<130> f;
newCapacity = f(newLength);
}
auto newFastModeData = (SmallValue*)GC_MALLOC(sizeof(SmallValue) * newCapacity);
memcpy(newFastModeData, m_fastModeData, sizeof(SmallValue) * oldLength);
GC_FREE(m_fastModeData);
@ -420,12 +428,15 @@ bool ArrayObject::setArrayLength(ExecutionState& state, const uint32_t newLength
m_fastModeData[i] = SmallValue(SmallValue::EmptyValue);
}
ensureObjectRareData()->m_arrayObjectFastModeBufferCapacity = newCapacity;
rd->m_arrayObjectFastModeBufferCapacity = newCapacity;
if (rd->m_arrayObjectFastModeBufferExpandCount < minExpandCountForUsingLog2Function) {
rd->m_arrayObjectFastModeBufferExpandCount++;
}
} else {
for (size_t i = oldLength; i < newLength; i++) {
m_fastModeData[i] = SmallValue(SmallValue::EmptyValue);
}
ensureObjectRareData()->m_arrayObjectFastModeBufferCapacity = oldCapacity;
rd->m_arrayObjectFastModeBufferCapacity = oldCapacity;
}
} else {
GC_FREE(m_fastModeData);

View file

@ -112,6 +112,15 @@ void AtomicString::init(AtomicStringMap* ec, String* name)
auto iter = ec->find(name);
if (ec->end() == iter) {
if (name->isStringView()) {
auto buffer = name->bufferAccessData();
if (buffer.has8BitContent) {
name = new Latin1String((const char*)buffer.buffer, buffer.length);
} else {
name = new UTF16String((const char16_t*)buffer.buffer, buffer.length);
}
}
ASSERT(!name->isStringView());
ec->insert(name);
ASSERT(ec->find(name) != ec->end());
m_string = name;

View file

@ -88,7 +88,7 @@ public:
return true;
}
const StringBufferAccessData& bufferAccessData() const
StringBufferAccessData bufferAccessData() const
{
return m_string->bufferAccessData();
}

View file

@ -17,10 +17,12 @@
* USA
*/
#if defined(ENABLE_SOURCE_COMPRESSION)
#if defined(ENABLE_COMPRESSIBLE_STRING)
#include "Escargot.h"
#include "CompressibleString.h"
#include "runtime/Context.h"
#include "runtime/VMInstance.h"
#include "lz4.h"
namespace Escargot {
@ -31,13 +33,81 @@ void* CompressibleString::operator new(size_t size)
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(CompressibleString)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(CompressibleString, m_bufferAccessData.buffer));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(CompressibleString, m_context));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(CompressibleString));
typeInited = true;
}
return GC_MALLOC_EXPLICITLY_TYPED(size, descr);
}
CompressibleString::CompressibleString(Context* context)
: String()
, m_isOwnerMayFreed(false)
, m_isCompressed(false)
, m_context(context)
, m_lastUsedTickcount(fastTickCount())
{
m_bufferData.hasSpecialImpl = true;
auto& v = context->vmInstance()->compressibleStrings();
v.push_back(this);
GC_REGISTER_FINALIZER_NO_ORDER(this, [](void* obj, void*) {
CompressibleString* self = (CompressibleString*)obj;
if (self->isCompressed()) {
self->m_compressedData.~CompressedDataVector();
} else {
deallocateStringDataBuffer(const_cast<void*>(self->m_bufferData.buffer));
}
if (!self->m_isOwnerMayFreed) {
self->m_context->vmInstance()->compressibleStringsUncomressedBufferSize() -= self->decomressedBufferSize();
auto& v = self->m_context->vmInstance()->compressibleStrings();
v.erase(std::find(v.begin(), v.end(), self));
}
},
nullptr, nullptr, nullptr);
}
CompressibleString::CompressibleString(Context* context, const char* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len);
memcpy(buf, str, len);
initBufferAccessData(buf, len, true);
}
CompressibleString::CompressibleString(Context* context, const LChar* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len);
memcpy(buf, str, len);
initBufferAccessData(buf, len, true);
}
CompressibleString::CompressibleString(Context* context, const char16_t* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len * 2);
memcpy(buf, str, len * 2);
initBufferAccessData(buf, len, false);
}
CompressibleString::CompressibleString(Context* context, void* buffer, size_t stringLength, bool is8bit)
: CompressibleString(context)
{
initBufferAccessData(buffer, stringLength, is8bit);
}
void CompressibleString::initBufferAccessData(void* data, size_t len, bool is8bit)
{
m_bufferData.has8BitContent = is8bit;
m_bufferData.length = len;
m_bufferData.buffer = data;
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() += decomressedBufferSize();
}
UTF8StringDataNonGCStd CompressibleString::toNonGCUTF8StringData() const
{
return bufferAccessData().toUTF8String<UTF8StringDataNonGCStd>();
@ -63,92 +133,130 @@ UTF16StringData CompressibleString::toUTF16StringData() const
}
}
void* CompressibleString::allocateStringDataBuffer(size_t byteLength)
{
return malloc(byteLength);
}
void CompressibleString::deallocateStringDataBuffer(void* ptr)
{
free(ptr);
}
bool CompressibleString::compress()
{
ASSERT(!m_bufferAccessData.hasSpecialImpl);
if (UNLIKELY(!m_bufferAccessData.length)) {
ASSERT(!m_isCompressed);
if (UNLIKELY(!m_bufferData.length)) {
return false;
}
bool has8Bit = m_bufferAccessData.has8BitContent;
bool has8Bit = m_bufferData.has8BitContent;
if (has8Bit) {
return compressWorker<LChar>();
return compressWorker<LChar>(currentStackPointer());
} else {
return compressWorker<char16_t>();
return compressWorker<char16_t>(currentStackPointer());
}
}
bool CompressibleString::decompress()
void CompressibleString::decompress()
{
ASSERT(m_bufferAccessData.hasSpecialImpl);
ASSERT(m_bufferAccessData.length);
ASSERT(m_isCompressed);
ASSERT(m_bufferData.length);
bool has8Bit = m_bufferAccessData.has8BitContent;
bool has8Bit = m_bufferData.has8BitContent;
if (has8Bit) {
return decompressWorker<LChar>();
decompressWorker<LChar>();
} else {
return decompressWorker<char16_t>();
decompressWorker<char16_t>();
}
}
constexpr static const size_t g_compressChunkSize = 1044465;
static_assert(LZ4_COMPRESSBOUND(g_compressChunkSize) == 1024 * 1024, "");
template <typename StringType>
bool CompressibleString::compressWorker()
bool CompressibleString::compressWorker(void* callerSP)
{
ASSERT(!m_bufferAccessData.hasSpecialImpl);
ASSERT(m_bufferAccessData.length > 0);
ASSERT(!m_isCompressed);
ASSERT(m_bufferData.length > 0);
int originByteLength = m_bufferAccessData.length * sizeof(StringType);
#if defined(STACK_GROWS_DOWN)
size_t* start = (size_t*)((size_t)callerSP & ~(sizeof(size_t) - 1));
size_t* end = (size_t*)m_context->vmInstance()->stackStartAddress();
#else
size_t* start = (size_t*)m_context->vmInstance()->stackStartAddress();
size_t* end = (size_t*)((size_t)callerSP & ~(sizeof(size_t) - 1));
#endif
int boundLength = LZ4::LZ4_compressBound(originByteLength);
char* compBuffer = new char[boundLength];
int compressedLength = LZ4::LZ4_compress_default(m_bufferAccessData.bufferAs8Bit, compBuffer, originByteLength, boundLength);
if (!compressedLength) {
// compression fail
return false;
while (start != end) {
if (UNLIKELY(*start == (size_t)m_bufferData.buffer)) {
// if there is reference on stack, we cannot compress string.
return false;
}
start++;
}
ASSERT(compressedLength > 0);
// immediately free the original string after compression
GC_FREE(const_cast<void*>(m_bufferAccessData.buffer));
size_t originByteLength = m_bufferData.length * sizeof(StringType);
int lastBoundLength = 0;
std::unique_ptr<char[]> compBuffer;
for (size_t srcIndex = 0; srcIndex < originByteLength; srcIndex += g_compressChunkSize) {
int srcSize = (int)std::min(g_compressChunkSize, originByteLength - srcIndex);
int boundLength = LZ4::LZ4_compressBound(srcSize);
if (boundLength > lastBoundLength) {
compBuffer.reset(new char[boundLength]);
lastBoundLength = boundLength;
}
char* data = (char*)GC_MALLOC_ATOMIC(compressedLength);
memcpy(data, compBuffer, compressedLength);
m_bufferAccessData.bufferAs8Bit = const_cast<const char*>(data);
m_bufferAccessData.hasSpecialImpl = true;
m_compressedLength = compressedLength;
int compressedLength = LZ4::LZ4_compress_default(m_bufferData.bufferAs8Bit + srcIndex, (char*)compBuffer.get(), srcSize, boundLength);
if (!compressedLength) {
// compression fail
return false;
}
delete[] compBuffer;
ASSERT(compressedLength > 0);
m_compressedData.push_back(std::vector<char>(compBuffer.get(), compBuffer.get() + compressedLength));
}
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() -= decomressedBufferSize();
// immediately free the original string after compression when there is no reference on stack
deallocateStringDataBuffer(const_cast<void*>(m_bufferData.buffer));
m_bufferData.bufferAs8Bit = nullptr;
m_isCompressed = true;
return true;
}
template <typename StringType>
bool CompressibleString::decompressWorker()
void CompressibleString::decompressWorker()
{
ASSERT(m_bufferAccessData.hasSpecialImpl);
ASSERT(m_isCompressed);
int originByteLength = m_bufferAccessData.length * sizeof(StringType);
int compressedLength = m_compressedLength;
size_t originByteLength = m_bufferData.length * sizeof(StringType);
char* data = (char*)GC_MALLOC_ATOMIC(originByteLength);
char* dstBuffer = (char*)allocateStringDataBuffer(originByteLength);
int dstIndex = 0;
int decompressedLength = LZ4::LZ4_decompress_safe(m_bufferAccessData.bufferAs8Bit, data, compressedLength, originByteLength);
if (!decompressedLength) {
// decompress fail
return false;
for (size_t srcIndex = 0, bufIndex = 0; srcIndex < originByteLength; srcIndex += g_compressChunkSize, bufIndex++) {
int srcSize = (int)std::min(g_compressChunkSize, originByteLength - srcIndex);
int decompressedLength = LZ4::LZ4_decompress_safe(m_compressedData[bufIndex].data(), dstBuffer + dstIndex, m_compressedData[bufIndex].size(), srcSize);
if (!decompressedLength) {
// decompress fail
RELEASE_ASSERT_NOT_REACHED();
}
dstIndex += srcSize;
}
ASSERT(decompressedLength == originByteLength);
// immediately free the decompressed string after decompression
GC_FREE(const_cast<void*>(m_bufferAccessData.buffer));
CompressedDataVector().swap(m_compressedData);
m_bufferAccessData.bufferAs8Bit = const_cast<const char*>(data);
m_bufferAccessData.hasSpecialImpl = false;
m_bufferData.bufferAs8Bit = const_cast<const char*>(dstBuffer);
m_isCompressed = false;
return true;
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() += decomressedBufferSize();
}
}
#endif // ENABLE_SOURCE_COMPRESSION
#endif // ENABLE_COMPRESSIBLE_STRING

View file

@ -20,143 +20,99 @@
#ifndef __EscargotCompressibleString__
#define __EscargotCompressibleString__
#if defined(ENABLE_SOURCE_COMPRESSION)
#if defined(ENABLE_COMPRESSIBLE_STRING)
#include "runtime/String.h"
namespace Escargot {
class Context;
class CompressibleString : public String {
friend class VMInstance;
public:
CompressibleString()
: String()
, m_compressedLength(0)
{
}
CompressibleString(Context* context);
// 8bit string constructor
explicit CompressibleString(Latin1StringData&& src)
: String()
, m_compressedLength(0)
{
Latin1StringData data = std::move(src);
initBufferAccessData(data);
}
explicit CompressibleString(const char* str)
: String()
, m_compressedLength(0)
{
Latin1StringData data;
data.append((const LChar*)str, strlen(str));
initBufferAccessData(data);
}
CompressibleString(const char* str, size_t len)
: String()
, m_compressedLength(0)
{
Latin1StringData data;
data.append((const LChar*)str, len);
initBufferAccessData(data);
}
CompressibleString(const LChar* str, size_t len)
: String()
, m_compressedLength(0)
{
Latin1StringData data;
data.append(str, len);
initBufferAccessData(data);
}
CompressibleString(Context* context, const char* str, size_t len);
CompressibleString(Context* context, const LChar* str, size_t len);
// 16bit string constructor
CompressibleString(Context* context, const char16_t* str, size_t len);
explicit CompressibleString(UTF16StringData&& src)
: String()
, m_compressedLength(0)
{
UTF16StringData data = std::move(src);
initBufferAccessData(data);
}
// from already allocated buffer
CompressibleString(Context* context, void* buffer, size_t stringLength, bool is8bit);
CompressibleString(const char16_t* str, size_t len)
: String()
, m_compressedLength(0)
{
UTF16StringData data;
data.append(str, len);
initBufferAccessData(data);
}
virtual bool isCompressibleString()
virtual bool isCompressibleString() override
{
return true;
}
virtual char16_t charAt(const size_t idx) const
{
return bufferAccessData().charAt(idx);
}
virtual UTF16StringData toUTF16StringData() const;
virtual UTF8StringData toUTF8StringData() const;
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const;
virtual UTF16StringData toUTF16StringData() const override;
virtual UTF8StringData toUTF8StringData() const override;
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const override;
virtual const LChar* characters8() const
virtual const LChar* characters8() const override
{
return (const LChar*)bufferAccessData().buffer;
}
virtual const char16_t* characters16() const
virtual const char16_t* characters16() const override
{
return (const char16_t*)bufferAccessData().buffer;
}
virtual void bufferAccessDataSpecialImpl()
virtual StringBufferAccessData bufferAccessDataSpecialImpl() override
{
ASSERT(m_bufferAccessData.hasSpecialImpl);
decompress();
ASSERT(!m_bufferAccessData.hasSpecialImpl);
m_lastUsedTickcount = fastTickCount();
if (isCompressed()) {
decompress();
}
return StringBufferAccessData(m_bufferData.has8BitContent, m_bufferData.length, const_cast<void*>(m_bufferData.buffer));
}
bool isCompressed()
{
// m_bufferAccessData.hasSpecialImpl represents compression status
return m_bufferAccessData.hasSpecialImpl;
return m_isCompressed;
}
void* operator new(size_t);
void* operator new[](size_t) = delete;
void operator delete[](void*) = delete;
static void* allocateStringDataBuffer(size_t byteLength);
static void deallocateStringDataBuffer(void* ptr);
bool compress();
bool decompress();
void decompress();
private:
void initBufferAccessData(Latin1StringData& stringData)
{
m_bufferAccessData.has8BitContent = true;
m_bufferAccessData.length = stringData.length();
m_bufferAccessData.buffer = stringData.takeBuffer();
}
void initBufferAccessData(void* data, size_t len, bool is8bit);
void initBufferAccessData(UTF16StringData& stringData)
size_t decomressedBufferSize()
{
m_bufferAccessData.has8BitContent = false;
m_bufferAccessData.length = stringData.length();
m_bufferAccessData.buffer = stringData.takeBuffer();
if (isCompressed()) {
return 0;
} else {
return m_bufferData.length * (m_bufferData.has8BitContent ? 1 : 2);
}
}
template <typename StringType>
bool compressWorker();
NEVER_INLINE bool compressWorker(void* callerSP);
template <typename StringType>
bool decompressWorker();
NEVER_INLINE void decompressWorker();
int m_compressedLength; // compressed length in byte
bool m_isOwnerMayFreed;
bool m_isCompressed;
Context* m_context;
uint64_t m_lastUsedTickcount;
typedef std::vector<std::vector<char>> CompressedDataVector;
CompressedDataVector m_compressedData;
};
}
#endif // ENABLE_SOURCE_COMPRESSION
#endif // ENABLE_COMPRESSIBLE_STRING
#endif

View file

@ -95,6 +95,7 @@ ObjectRareData::ObjectRareData(Object* obj)
m_isSpreadArrayObject = false;
m_shouldUpdateEnumerateObject = false;
m_hasNonWritableLastIndexRegexpObject = false;
m_arrayObjectFastModeBufferExpandCount = 0;
m_extraData = nullptr;
m_internalSlot = nullptr;
}
@ -1628,29 +1629,43 @@ Value Object::speciesConstructor(ExecutionState& state, const Value& defaultCons
String* Object::optionString(ExecutionState& state)
{
char flags[6] = { 0 };
int flags_idx = 0;
size_t flagsIdx = 0;
size_t cacheIndex = 0;
if (this->get(state, ObjectPropertyName(state, state.context()->staticStrings().global)).value(state, this).toBoolean(state)) {
flags[flags_idx++] = 'g';
flags[flagsIdx++] = 'g';
cacheIndex |= 1 << 0;
}
if (this->get(state, ObjectPropertyName(state, state.context()->staticStrings().ignoreCase)).value(state, this).toBoolean(state)) {
flags[flags_idx++] = 'i';
flags[flagsIdx++] = 'i';
cacheIndex |= 1 << 1;
}
if (this->get(state, ObjectPropertyName(state, state.context()->staticStrings().multiline)).value(state, this).toBoolean(state)) {
flags[flags_idx++] = 'm';
flags[flagsIdx++] = 'm';
cacheIndex |= 1 << 2;
}
if (this->get(state, ObjectPropertyName(state, state.context()->staticStrings().unicode)).value(state, this).toBoolean(state)) {
flags[flags_idx++] = 'u';
flags[flagsIdx++] = 'u';
cacheIndex |= 1 << 3;
}
if (this->get(state, ObjectPropertyName(state, state.context()->staticStrings().sticky)).value(state, this).toBoolean(state)) {
flags[flags_idx++] = 'y';
flags[flagsIdx++] = 'y';
cacheIndex |= 1 << 4;
}
return new ASCIIString(flags);
ASCIIString* result;
auto cache = state.context()->vmInstance()->regexpOptionStringCache();
if (cache[cacheIndex]) {
result = cache[cacheIndex];
} else {
result = cache[cacheIndex] = new ASCIIString(flags, flagsIdx);
}
return result;
}
bool Object::isRegExp(ExecutionState& state)

View file

@ -52,6 +52,7 @@ struct ObjectRareData : public PointerValue {
bool m_isSpreadArrayObject : 1;
bool m_shouldUpdateEnumerateObject : 1; // used only for Array Object when ArrayObject::deleteOwnProperty called
bool m_hasNonWritableLastIndexRegexpObject : 1;
uint8_t m_arrayObjectFastModeBufferExpandCount : 8;
void* m_extraData;
Object* m_prototype;
union {

View file

@ -110,20 +110,23 @@ void* RegExpObject::operator new(size_t size)
static String* escapeSlashInPattern(String* patternStr)
{
if (patternStr->length() == 0)
if (patternStr->length() == 0) {
return patternStr;
}
size_t len = patternStr->length();
auto accessData = patternStr->bufferAccessData();
const size_t& len = accessData.length;
bool slashFlag = false;
size_t i, start = 0;
StringBuilder builder;
while (true) {
for (i = 0; start + i < len; i++) {
if (UNLIKELY(patternStr->charAt(start + i) == '/') && i > 0) {
if (UNLIKELY(accessData.charAt(start + i) == '/') && i > 0) {
size_t backSlashCount = 0;
size_t s = start + i - 1;
while (true) {
if (patternStr->charAt(s) == '\\') {
if (accessData.charAt(s) == '\\') {
backSlashCount++;
if (s == 0) {
break;
@ -153,10 +156,11 @@ static String* escapeSlashInPattern(String* patternStr)
break;
}
}
if (!slashFlag)
if (!slashFlag) {
return patternStr;
else
} else {
return builder.finalize();
}
}
void RegExpObject::internalInit(ExecutionState& state, String* source)
@ -224,8 +228,9 @@ void RegExpObject::parseOption(ExecutionState& state, const String* optionString
{
this->m_option = RegExpObject::Option::None;
for (size_t i = 0; i < optionString->length(); i++) {
switch (optionString->charAt(i)) {
auto bufferAccessData = optionString->bufferAccessData();
for (size_t i = 0; i < bufferAccessData.length; i++) {
switch (bufferAccessData.charAt(i)) {
case 'g':
if (this->m_option & Option::Global)
ErrorObject::throwBuiltinError(state, ErrorObject::SyntaxError, "RegExp has multiple 'g' flags");

View file

@ -31,7 +31,7 @@ void* RopeString::operator new(size_t size)
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(RopeString)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RopeString, m_left));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RopeString, m_bufferAccessData.buffer));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RopeString, m_bufferData.buffer));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(RopeString));
typeInited = true;
}
@ -83,45 +83,45 @@ String* RopeString::createRopeString(String* lstr, String* rstr, ExecutionState*
}
RopeString* rope = new RopeString();
rope->m_bufferAccessData.length = llen + rlen;
rope->m_bufferData.length = llen + rlen;
rope->m_left = lstr;
rope->m_bufferAccessData.buffer = rstr;
rope->m_bufferData.buffer = rstr;
bool l8bit;
if (lstr->isRopeString()) {
l8bit = ((RopeString*)lstr)->m_bufferAccessData.has8BitContent;
l8bit = ((RopeString*)lstr)->m_bufferData.has8BitContent;
} else {
l8bit = lstr->has8BitContent();
}
bool r8bit;
if (rstr->isRopeString()) {
r8bit = ((RopeString*)rstr)->m_bufferAccessData.has8BitContent;
r8bit = ((RopeString*)rstr)->m_bufferData.has8BitContent;
} else {
r8bit = rstr->has8BitContent();
}
rope->m_bufferAccessData.has8BitContent = l8bit & r8bit;
rope->m_bufferData.has8BitContent = l8bit & r8bit;
return rope;
}
template <typename ResultType>
void RopeString::flattenRopeStringWorker()
{
ResultType* result = (ResultType*)GC_MALLOC_ATOMIC(sizeof(ResultType) * m_bufferAccessData.length);
ResultType* result = (ResultType*)GC_MALLOC_ATOMIC(sizeof(ResultType) * m_bufferData.length);
std::vector<String*> queue;
queue.push_back(m_left);
queue.push_back((String*)m_bufferAccessData.buffer);
size_t pos = m_bufferAccessData.length;
queue.push_back((String*)m_bufferData.buffer);
size_t pos = m_bufferData.length;
size_t k = 0;
while (!queue.empty()) {
String* cur = queue.back();
queue.pop_back();
if (cur->isRopeString()) {
RopeString* cur2 = (RopeString*)cur;
if (cur2->m_bufferAccessData.hasSpecialImpl) {
if (cur2->m_bufferData.hasSpecialImpl) {
queue.push_back(cur2->m_left);
queue.push_back((String*)cur2->m_bufferAccessData.buffer);
queue.push_back((String*)cur2->m_bufferData.buffer);
continue;
}
}
@ -144,8 +144,8 @@ void RopeString::flattenRopeStringWorker()
}
}
m_bufferAccessData.hasSpecialImpl = false;
m_bufferAccessData.buffer = result;
m_bufferData.hasSpecialImpl = false;
m_bufferData.buffer = result;
m_left = nullptr;
}
@ -153,7 +153,7 @@ void RopeString::flattenRopeStringWorker()
void RopeString::flattenRopeString()
{
ASSERT(m_left);
if (m_bufferAccessData.has8BitContent) {
if (m_bufferData.has8BitContent) {
flattenRopeStringWorker<LChar>();
} else {
flattenRopeStringWorker<char16_t>();

View file

@ -32,10 +32,10 @@ public:
: String()
{
m_left = String::emptyString;
m_bufferAccessData.has8BitContent = true;
m_bufferAccessData.hasSpecialImpl = true;
m_bufferAccessData.length = 0;
m_bufferAccessData.buffer = nullptr;
m_bufferData.has8BitContent = true;
m_bufferData.hasSpecialImpl = true;
m_bufferData.length = 0;
m_bufferData.buffer = nullptr;
}
// this function not always create RopeString.
@ -44,40 +44,38 @@ public:
// provide ExecutionState if you need limit of string length(exception can be thrown only in ExecutionState area)
static String* createRopeString(String* lstr, String* rstr, ExecutionState* state = nullptr);
virtual char16_t charAt(const size_t idx) const
{
return bufferAccessData().charAt(idx);
}
virtual UTF16StringData toUTF16StringData() const;
virtual UTF8StringData toUTF8StringData() const;
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const;
virtual UTF16StringData toUTF16StringData() const override;
virtual UTF8StringData toUTF8StringData() const override;
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const override;
virtual bool isRopeString()
virtual bool isRopeString() override
{
return true;
}
virtual const LChar* characters8() const
virtual const LChar* characters8() const override
{
return (const LChar*)bufferAccessData().buffer;
}
virtual const char16_t* characters16() const
virtual const char16_t* characters16() const override
{
return (const char16_t*)bufferAccessData().buffer;
}
virtual void bufferAccessDataSpecialImpl()
{
ASSERT(m_bufferAccessData.hasSpecialImpl);
flattenRopeString();
ASSERT(!m_bufferAccessData.hasSpecialImpl);
}
void* operator new(size_t size);
void* operator new[](size_t size) = delete;
protected:
virtual StringBufferAccessData bufferAccessDataSpecialImpl() override
{
ASSERT(m_bufferData.hasSpecialImpl);
flattenRopeString();
ASSERT(!m_bufferData.hasSpecialImpl);
return m_bufferData;
}
template <typename ResultType>
void flattenRopeStringWorker();
void flattenRopeString();

View file

@ -204,7 +204,7 @@ char32_t readUTF8Sequence(const char*& sequence, bool& valid, int& charlen)
return ch - offsetsFromUTF8[length - 1];
}
UTF16StringData utf8StringToUTF16String(const char* buf, const size_t len)
UTF16StringDataNonGCStd utf8StringToUTF16StringNonGC(const char* buf, const size_t len)
{
UTF16StringDataNonGCStd str;
const char* source = buf;
@ -230,6 +230,12 @@ UTF16StringData utf8StringToUTF16String(const char* buf, const size_t len)
}
}
return str;
}
UTF16StringData utf8StringToUTF16String(const char* buf, const size_t len)
{
auto str = utf8StringToUTF16StringNonGC(buf, len);
return UTF16StringData(str.data(), str.length());
}
@ -381,7 +387,7 @@ UTF8StringData Latin1String::toUTF8StringData() const
UTF8StringData ret;
size_t len = length();
for (size_t i = 0; i < len; i++) {
uint8_t ch = m_bufferAccessData.uncheckedCharAtFor8Bit(i); /* assume that code points above 0xff are impossible since latin-1 is 8-bit */
uint8_t ch = m_bufferData.uncheckedCharAtFor8Bit(i); /* assume that code points above 0xff are impossible since latin-1 is 8-bit */
if (ch < 0x80) {
ret.append((char*)&ch, 1);
} else {
@ -398,7 +404,7 @@ UTF8StringDataNonGCStd Latin1String::toNonGCUTF8StringData() const
UTF8StringDataNonGCStd ret;
size_t len = length();
for (size_t i = 0; i < len; i++) {
uint8_t ch = m_bufferAccessData.uncheckedCharAtFor8Bit(i); /* assume that code points above 0xff are impossible since latin-1 is 8-bit */
uint8_t ch = m_bufferData.uncheckedCharAtFor8Bit(i); /* assume that code points above 0xff are impossible since latin-1 is 8-bit */
if (ch < 0x80) {
ret.append((char*)&ch, 1);
} else {
@ -606,11 +612,15 @@ String* String::fromUTF8(const char* src, size_t len)
}
}
#if defined(ENABLE_SOURCE_COMPRESSION)
String* String::fromUTF8ToCompressibleString(const char* src, size_t len)
#if defined(ENABLE_COMPRESSIBLE_STRING)
String* String::fromUTF8ToCompressibleString(Context* context, const char* src, size_t len)
{
auto s = utf8StringToUTF16String(src, len);
return new CompressibleString(std::move(s));
if (isAllASCII(src, len)) {
return new CompressibleString(context, src, len);
} else {
auto s = utf8StringToUTF16StringNonGC(src, len);
return new CompressibleString(context, s.data(), s.length());
}
}
#endif
@ -635,12 +645,13 @@ int String::stringCompare(size_t l1, size_t l2, const String* c1, const String*
bool String::equals(const String* src) const
{
const auto& myData = bufferAccessData();
const auto& srcData = src->bufferAccessData();
if (srcData.length != myData.length) {
if (length() != src->length()) {
return false;
}
const auto& myData = bufferAccessData();
const auto& srcData = src->bufferAccessData();
bool myIs8Bit = myData.has8BitContent;
bool srcIs8Bit = srcData.has8BitContent;
@ -658,13 +669,14 @@ bool String::equals(const String* src) const
uint64_t String::tryToUseAsArrayIndex() const
{
uint32_t number = 0;
const auto& data = bufferAccessData();
const size_t& len = data.length;
const size_t& len = length();
if (UNLIKELY(len == 0)) {
return Value::InvalidArrayIndexValue;
}
const auto& data = bufferAccessData();
char16_t first;
if (LIKELY(data.has8BitContent)) {
first = ((LChar*)data.buffer)[0];
@ -698,13 +710,14 @@ uint64_t String::tryToUseAsArrayIndex() const
uint64_t String::tryToUseAsIndex() const
{
uint32_t number = 0;
const auto& data = bufferAccessData();
const size_t& len = data.length;
const size_t& len = length();
if (UNLIKELY(len == 0)) {
return Value::InvalidIndexValue;
}
const auto& data = bufferAccessData();
char16_t first;
if (LIKELY(data.has8BitContent)) {
first = ((LChar*)data.buffer)[0];
@ -856,7 +869,7 @@ void* ASCIIString::operator new(size_t size)
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(ASCIIString)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(ASCIIString, m_bufferAccessData.buffer));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(ASCIIString, m_bufferData.buffer));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(ASCIIString));
typeInited = true;
}
@ -869,7 +882,7 @@ void* Latin1String::operator new(size_t size)
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(Latin1String)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(Latin1String, m_bufferAccessData.buffer));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(Latin1String, m_bufferData.buffer));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(Latin1String));
typeInited = true;
}
@ -882,7 +895,7 @@ void* UTF16String::operator new(size_t size)
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(UTF16String)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(UTF16String, m_bufferAccessData.buffer));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(UTF16String, m_bufferData.buffer));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(UTF16String));
typeInited = true;
}

View file

@ -75,22 +75,25 @@ class Latin1String;
class UTF16String;
class RopeString;
class StringView;
class Context;
struct StringBufferAccessData {
bool has8BitContent : 1;
bool hasSpecialImpl : 1;
#if defined(ESCARGOT_32)
size_t length : 30;
#else
size_t length : 62;
#endif
bool has8BitContent;
size_t length;
union {
const void* buffer;
const char* bufferAs8Bit;
const char16_t* bufferAs16Bit;
};
void* extraData;
COMPILE_ASSERT(STRING_MAXIMUM_LENGTH < (std::numeric_limits<size_t>::max() >> 2), "");
StringBufferAccessData(bool has8BitContent, size_t length, void* buffer, void* extraDataKeepInStack = nullptr)
: has8BitContent(has8BitContent)
, length(length)
, buffer(buffer)
, extraData(extraDataKeepInStack)
{
}
char16_t uncheckedCharAtFor8Bit(size_t idx) const
{
@ -176,9 +179,54 @@ protected:
String()
{
m_tag = POINTER_VALUE_STRING_TAG_IN_DATA;
m_bufferAccessData.hasSpecialImpl = false;
m_bufferData.hasSpecialImpl = false;
}
struct StringBufferData {
bool has8BitContent : 1;
bool hasSpecialImpl : 1;
#if defined(ESCARGOT_32)
size_t length : 30;
#else
size_t length : 62;
#endif
union {
const void* buffer;
const char* bufferAs8Bit;
const char16_t* bufferAs16Bit;
String* bufferAsString;
};
COMPILE_ASSERT(STRING_MAXIMUM_LENGTH < (std::numeric_limits<size_t>::max() >> 2), "");
operator StringBufferAccessData() const
{
ASSERT(!hasSpecialImpl);
return StringBufferAccessData(has8BitContent, length, const_cast<void*>(buffer));
}
char16_t uncheckedCharAtFor8Bit(size_t idx) const
{
ASSERT(has8BitContent);
return ((LChar*)buffer)[idx];
}
char16_t uncheckedCharAtFor16Bit(size_t idx) const
{
ASSERT(!has8BitContent);
return ((char16_t*)buffer)[idx];
}
char16_t charAt(size_t idx) const
{
if (has8BitContent) {
return ((LChar*)buffer)[idx];
} else {
return ((char16_t*)buffer)[idx];
}
}
};
public:
enum FromExternalMemoryTag {
FromExternalMemory
@ -189,6 +237,11 @@ public:
return true;
}
virtual bool isStringView()
{
return false;
}
virtual bool isCompressibleString()
{
return false;
@ -201,7 +254,7 @@ public:
bool has8BitContent() const
{
return bufferAccessData().has8BitContent;
return m_bufferData.has8BitContent;
}
static String* fromASCII(const char* s);
@ -213,30 +266,29 @@ public:
return fromDouble(v);
}
static String* fromUTF8(const char* src, size_t len);
#if defined(ENABLE_SOURCE_COMPRESSION)
static String* fromUTF8ToCompressibleString(const char* src, size_t len);
#if defined(ENABLE_COMPRESSIBLE_STRING)
static String* fromUTF8ToCompressibleString(Context* context, const char* src, size_t len);
#endif
size_t length() const
{
return m_bufferAccessData.length;
return m_bufferData.length;
}
virtual char16_t charAt(const size_t idx) const
{
return bufferAccessData().charAt(idx);
}
virtual char16_t charAt(const size_t idx) const = 0;
char16_t operator[](const size_t idx) const
{
return charAt(idx);
}
virtual void bufferAccessDataSpecialImpl()
ALWAYS_INLINE StringBufferAccessData bufferAccessData() const
{
ASSERT(m_bufferAccessData.hasSpecialImpl);
}
ALWAYS_INLINE const StringBufferAccessData& bufferAccessData() const
{
if (UNLIKELY(m_bufferAccessData.hasSpecialImpl)) {
const_cast<String*>(this)->bufferAccessDataSpecialImpl();
if (UNLIKELY(m_bufferData.hasSpecialImpl)) {
return const_cast<String*>(this)->bufferAccessDataSpecialImpl();
}
return m_bufferAccessData;
return m_bufferData;
}
bool equals(const String* src) const;
@ -410,7 +462,13 @@ private:
size_t m_tag;
protected:
StringBufferAccessData m_bufferAccessData;
StringBufferData m_bufferData;
virtual StringBufferAccessData bufferAccessDataSpecialImpl()
{
RELEASE_ASSERT_NOT_REACHED();
return m_bufferData;
}
static int stringCompare(size_t l1, size_t l2, const String* c1, const String* c2);
template <typename T>
@ -495,27 +553,27 @@ public:
ASCIIString(const char* str, size_t len, FromExternalMemoryTag)
: String()
{
m_bufferAccessData.bufferAs8Bit = str;
m_bufferAccessData.length = len;
m_bufferAccessData.hasSpecialImpl = false;
m_bufferAccessData.has8BitContent = true;
m_bufferData.bufferAs8Bit = str;
m_bufferData.length = len;
m_bufferData.hasSpecialImpl = false;
m_bufferData.has8BitContent = true;
}
virtual char16_t charAt(const size_t idx) const
{
return m_bufferAccessData.uncheckedCharAtFor8Bit(idx);
return m_bufferData.uncheckedCharAtFor8Bit(idx);
}
virtual const LChar* characters8() const
{
return (const LChar*)m_bufferAccessData.buffer;
return (const LChar*)m_bufferData.buffer;
}
void initBufferAccessData(ASCIIStringData& stringData)
{
m_bufferAccessData.has8BitContent = true;
m_bufferAccessData.length = stringData.length();
m_bufferAccessData.buffer = stringData.takeBuffer();
m_bufferData.has8BitContent = true;
m_bufferData.length = stringData.length();
m_bufferData.buffer = stringData.takeBuffer();
}
virtual UTF16StringData toUTF16StringData() const;
@ -562,10 +620,10 @@ public:
Latin1String(const LChar* str, size_t len, FromExternalMemoryTag)
: String()
{
m_bufferAccessData.buffer = str;
m_bufferAccessData.length = len;
m_bufferAccessData.hasSpecialImpl = false;
m_bufferAccessData.has8BitContent = true;
m_bufferData.buffer = str;
m_bufferData.length = len;
m_bufferData.hasSpecialImpl = false;
m_bufferData.has8BitContent = true;
}
Latin1String(const LChar* str, size_t len)
@ -591,19 +649,19 @@ public:
void initBufferAccessData(Latin1StringData& stringData)
{
m_bufferAccessData.has8BitContent = true;
m_bufferAccessData.length = stringData.length();
m_bufferAccessData.buffer = stringData.takeBuffer();
m_bufferData.has8BitContent = true;
m_bufferData.length = stringData.length();
m_bufferData.buffer = stringData.takeBuffer();
}
virtual char16_t charAt(const size_t idx) const
{
return m_bufferAccessData.uncheckedCharAtFor8Bit(idx);
return m_bufferData.uncheckedCharAtFor8Bit(idx);
}
virtual const LChar* characters8() const
{
return (const LChar*)m_bufferAccessData.buffer;
return (const LChar*)m_bufferData.buffer;
}
virtual UTF16StringData toUTF16StringData() const;
@ -634,27 +692,27 @@ public:
UTF16String(const char16_t* str, size_t len, FromExternalMemoryTag)
: String()
{
m_bufferAccessData.bufferAs16Bit = str;
m_bufferAccessData.length = len;
m_bufferAccessData.hasSpecialImpl = false;
m_bufferAccessData.has8BitContent = false;
m_bufferData.bufferAs16Bit = str;
m_bufferData.length = len;
m_bufferData.hasSpecialImpl = false;
m_bufferData.has8BitContent = false;
}
void initBufferAccessData(UTF16StringData& stringData)
{
m_bufferAccessData.has8BitContent = false;
m_bufferAccessData.length = stringData.length();
m_bufferAccessData.buffer = stringData.takeBuffer();
m_bufferData.has8BitContent = false;
m_bufferData.length = stringData.length();
m_bufferData.buffer = stringData.takeBuffer();
}
virtual char16_t charAt(const size_t idx) const
{
return m_bufferAccessData.uncheckedCharAtFor16Bit(idx);
return m_bufferData.uncheckedCharAtFor16Bit(idx);
}
virtual const char16_t* characters16() const
{
return (const char16_t*)m_bufferAccessData.buffer;
return (const char16_t*)m_bufferData.buffer;
}
virtual UTF16StringData toUTF16StringData() const;

View file

@ -28,7 +28,7 @@ void* StringView::operator new(size_t size)
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(StringView)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(StringView, m_string));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(StringView, m_bufferData.bufferAsString));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(StringView));
typeInited = true;
}

View file

@ -28,37 +28,26 @@ class StringView : public String {
public:
ALWAYS_INLINE StringView(String* str, const size_t s, const size_t e)
: String()
, m_string(str)
{
ASSERT(s <= e);
ASSERT(e <= str->length());
initBufferAccessData(str->bufferAccessData(), s, e);
initBufferAccessData(str, s, e);
}
ALWAYS_INLINE explicit StringView(String* str)
: String()
, m_string(str)
{
initBufferAccessData(str->bufferAccessData(), 0, str->length());
initBufferAccessData(str, 0, str->length());
}
ALWAYS_INLINE StringView(const StringView& str, const size_t s, const size_t e)
: String()
, m_string(str.string())
{
initBufferAccessData(str.bufferAccessData(), s, e);
initBufferAccessData(str.m_bufferData.bufferAsString, s + str.m_start, e + str.m_start);
}
ALWAYS_INLINE StringView()
: String()
, m_string(String::emptyString)
{
initBufferAccessData(String::emptyString->bufferAccessData(), 0, 0);
}
virtual char16_t charAt(const size_t idx) const
{
return bufferAccessData().charAt(idx);
initBufferAccessData(String::emptyString, 0, 0);
}
bool operator==(const char* src) const
@ -91,7 +80,7 @@ public:
return !operator==(src);
}
virtual UTF16StringData toUTF16StringData() const
virtual UTF16StringData toUTF16StringData() const override
{
UTF16StringData ret;
size_t len = length();
@ -104,35 +93,31 @@ public:
return ret;
}
virtual UTF8StringData toUTF8StringData() const
virtual UTF8StringData toUTF8StringData() const override
{
return bufferAccessData().toUTF8String<UTF8StringData, UTF8StringDataNonGCStd>();
}
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const
virtual UTF8StringDataNonGCStd toNonGCUTF8StringData() const override
{
return bufferAccessData().toUTF8String<UTF8StringDataNonGCStd>();
}
virtual const LChar* characters8() const
virtual const LChar* characters8() const override
{
ASSERT(has8BitContent());
return (LChar*)m_bufferAccessData.buffer;
return (LChar*)bufferAccessData().buffer;
}
virtual const char16_t* characters16() const
virtual const char16_t* characters16() const override
{
ASSERT(!has8BitContent());
return (const char16_t*)m_bufferAccessData.buffer;
return (const char16_t*)bufferAccessData().buffer;
}
char16_t bufferedCharAt(const size_t idx) const
virtual bool isStringView() override
{
if (m_bufferAccessData.has8BitContent) {
return ((const LChar*)m_bufferAccessData.buffer)[idx];
} else {
return ((const char16_t*)m_bufferAccessData.buffer)[idx];
}
return true;
}
void* operator new(size_t size, void* ptr)
@ -142,43 +127,36 @@ public:
void* operator new(size_t size);
void* operator new[](size_t size) = delete;
String* string() const
{
return m_string;
}
size_t start() const
{
size_t src = (size_t)string()->bufferAccessData().buffer;
size_t my = (size_t)bufferAccessData().buffer;
size_t diff = my - src;
if (!bufferAccessData().has8BitContent) {
diff /= 2;
}
return diff;
}
size_t end() const
{
return start() + length();
}
protected:
ALWAYS_INLINE void initBufferAccessData(const StringBufferAccessData& srcData, size_t start, size_t end)
virtual StringBufferAccessData bufferAccessDataSpecialImpl() override
{
m_bufferAccessData.has8BitContent = srcData.has8BitContent;
m_bufferAccessData.length = end - start;
if (srcData.has8BitContent) {
m_bufferAccessData.buffer = ((LChar*)srcData.buffer) + start;
ASSERT(m_bufferData.hasSpecialImpl);
StringBufferAccessData r = m_bufferData.bufferAsString->bufferAccessData();
// keep original buffer pointer in stack
// without this, compressible string can free this pointer
r.extraData = const_cast<void*>(r.buffer);
r.length = m_bufferData.length;
if (r.has8BitContent) {
r.bufferAs8Bit += m_start;
} else {
m_bufferAccessData.buffer = ((char16_t*)srcData.buffer) + start;
r.bufferAs16Bit += m_start;
}
return r;
}
ALWAYS_INLINE void initBufferAccessData(String* str, size_t start, size_t end)
{
m_bufferData.hasSpecialImpl = true;
m_bufferData.bufferAsString = str;
m_bufferData.has8BitContent = str->has8BitContent();
m_bufferData.length = end - start;
m_start = start;
}
private:
String* m_string;
size_t m_start;
};
}

View file

@ -24,9 +24,12 @@
#include "runtime/ArrayBufferObject.h"
#include "runtime/StringObject.h"
#include "runtime/JobQueue.h"
#include "runtime/CompressibleString.h"
#include "interpreter/ByteCode.h"
#include "parser/ASTAllocator.h"
#include <pthread.h>
namespace Escargot {
extern size_t g_doubleInSmallValueTag;
@ -92,6 +95,33 @@ bool VMInstance::regexpLastIndexNativeSetter(ExecutionState& state, Object* self
static ObjectPropertyNativeGetterSetterData regexpLastIndexGetterSetterData(
true, false, false, &VMInstance::regexpLastIndexNativeGetter, &VMInstance::regexpLastIndexNativeSetter);
#if defined(ENABLE_COMPRESSIBLE_STRING)
#define COMPRESSIBLE_COMPRESS_CHECK_INTERVAL 1000
#define COMPRESSIBLE_COMPRESS_USED_BEFORE_INTERVAL 1000
#define COMPRESSIBLE_COMPRESS_MIN_SIZE 1024 * 128
void VMInstance::compressStringsIfNeeds(uint64_t currentTickCount)
{
auto& currentAllocatedCompressibleStrings = compressibleStrings();
const size_t& currentAllocatedCompressibleStringsCount = currentAllocatedCompressibleStrings.size();
size_t mostBigIndex = SIZE_MAX;
for (size_t i = 0; i < currentAllocatedCompressibleStringsCount; i++) {
if (!currentAllocatedCompressibleStrings[i]->isCompressed() && currentTickCount - currentAllocatedCompressibleStrings[i]->m_lastUsedTickcount > COMPRESSIBLE_COMPRESS_USED_BEFORE_INTERVAL && currentAllocatedCompressibleStrings[i]->decomressedBufferSize() > COMPRESSIBLE_COMPRESS_MIN_SIZE) {
if (mostBigIndex == SIZE_MAX) {
mostBigIndex = i;
} else if (currentAllocatedCompressibleStrings[i]->decomressedBufferSize() > currentAllocatedCompressibleStrings[mostBigIndex]->decomressedBufferSize()) {
mostBigIndex = i;
}
}
}
if (mostBigIndex != SIZE_MAX) {
currentAllocatedCompressibleStrings[mostBigIndex]->compress();
}
}
#endif
void VMInstance::gcEventCallback(GC_EventType t, void* data)
{
@ -111,6 +141,13 @@ void VMInstance::gcEventCallback(GC_EventType t, void* data)
}
}
} else if (t == GC_EventType::GC_EVENT_RECLAIM_END) {
#if defined(ENABLE_COMPRESSIBLE_STRING)
auto currentTick = fastTickCount();
if (currentTick - self->m_lastCompressibleStringsTestTime > COMPRESSIBLE_COMPRESS_CHECK_INTERVAL) {
self->compressStringsIfNeeds(currentTick);
self->m_lastCompressibleStringsTestTime = currentTick;
}
#endif
auto& currentCodeSizeTotal = self->compiledByteCodeSize();
if (currentCodeSizeTotal == std::numeric_limits<size_t>::max()) {
@ -172,6 +209,7 @@ void* VMInstance::operator new(size_t size)
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_toStringRecursionPreventer.m_registeredItems));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_bumpPointerAllocator));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_regexpCache));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_regexpOptionStringCache));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_cachedUTC));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_platform));
GC_set_bit(desc, GC_WORD_OFFSET(VMInstance, m_jobQueue));
@ -184,11 +222,20 @@ void* VMInstance::operator new(size_t size)
VMInstance::~VMInstance()
{
auto& v = compiledByteCodeBlocks();
for (size_t i = 0; i < v.size(); i++) {
v[i]->m_isOwnerMayFreed = true;
{
auto& v = compiledByteCodeBlocks();
for (size_t i = 0; i < v.size(); i++) {
v[i]->m_isOwnerMayFreed = true;
}
}
#if defined(ENABLE_COMPRESSIBLE_STRING)
{
auto& v = compressibleStrings();
for (size_t i = 0; i < v.size(); i++) {
v[i]->m_isOwnerMayFreed = true;
}
}
#endif
m_isFinalized = true;
GC_remove_event_callback(gcEventCallback, this);
if (m_onVMInstanceDestroy) {
@ -207,6 +254,10 @@ VMInstance::VMInstance(Platform* platform, const char* locale, const char* timez
, m_isFinalized(false)
, m_didSomePrototypeObjectDefineIndexedProperty(false)
, m_compiledByteCodeSize(0)
#if defined(ENABLE_COMPRESSIBLE_STRING)
, m_lastCompressibleStringsTestTime(0)
, m_compressibleStringsUncomressedBufferSize(0)
#endif
, m_onVMInstanceDestroy(nullptr)
, m_onVMInstanceDestroyData(nullptr)
, m_cachedUTC(nullptr)
@ -219,6 +270,20 @@ VMInstance::VMInstance(Platform* platform, const char* locale, const char* timez
},
nullptr, nullptr, nullptr);
pthread_attr_t attr;
RELEASE_ASSERT(pthread_getattr_np(pthread_self(), &attr) == 0);
size_t size;
RELEASE_ASSERT(pthread_attr_getstack(&attr, &m_stackStartAddress, &size) == 0);
pthread_attr_destroy(&attr);
#ifdef STACK_GROWS_DOWN
m_stackStartAddress = (char*)m_stackStartAddress + size;
#endif
// test stack base property aligned
RELEASE_ASSERT(((size_t)m_stackStartAddress) % sizeof(size_t) == 0);
if (!String::emptyString) {
String::emptyString = new (NoGC) ASCIIString("");
}
@ -226,6 +291,9 @@ VMInstance::VMInstance(Platform* platform, const char* locale, const char* timez
m_bumpPointerAllocator = new (PointerFreeGC) WTF::BumpPointerAllocator();
m_regexpCache = new (GC) RegExpCacheMap();
m_regexpOptionStringCache = (ASCIIString**)GC_MALLOC(32 * sizeof(ASCIIString*));
memset(m_regexpOptionStringCache, 0, 32 * sizeof(ASCIIString*));
#ifdef ENABLE_ICU
m_timezone = nullptr;
if (timezone) {
@ -241,7 +309,17 @@ VMInstance::VMInstance(Platform* platform, const char* locale, const char* timez
} else if (getenv("LOCALE") && strlen(getenv("LOCALE"))) {
m_locale = getenv("LOCALE");
} else {
#if defined(ENABLE_RUNTIME_ICU_BINDER)
m_locale = RuntimeICUBinder::ICU::findSystemLocale();
#else
m_locale = uloc_getDefault();
#endif
}
#endif
#if defined(ESCARGOT_ENABLE_TEST)
if (getenv("RANDOM_SEED_ZERO")) {
m_randEngine = std::mt19937(0);
}
#endif

View file

@ -37,6 +37,7 @@ class CodeBlock;
class JobQueue;
class Job;
class ASTAllocator;
class CompressibleString;
#define DEFINE_GLOBAL_SYMBOLS(F) \
F(hasInstance) \
@ -179,6 +180,18 @@ public:
return m_compiledByteCodeSize;
}
#if defined(ENABLE_COMPRESSIBLE_STRING)
std::vector<CompressibleString*>& compressibleStrings()
{
return m_compressibleStrings;
}
size_t& compressibleStringsUncomressedBufferSize()
{
return m_compressibleStringsUncomressedBufferSize;
}
#endif
std::mt19937& randEngine()
{
return m_randEngine;
@ -194,6 +207,17 @@ public:
return m_currentSandBox;
}
void* stackStartAddress()
{
return m_stackStartAddress;
}
ASCIIString** regexpOptionStringCache()
{
return m_regexpOptionStringCache;
}
void setOnDestroyCallback(void (*onVMInstanceDestroy)(VMInstance* instance, void* data), void* data)
{
m_onVMInstanceDestroy = onVMInstanceDestroy;
@ -229,15 +253,26 @@ private:
std::vector<ByteCodeBlock*> m_compiledByteCodeBlocks;
size_t m_compiledByteCodeSize;
#if defined(ENABLE_COMPRESSIBLE_STRING)
uint64_t m_lastCompressibleStringsTestTime;
size_t m_compressibleStringsUncomressedBufferSize;
std::vector<CompressibleString*> m_compressibleStrings;
NEVER_INLINE void compressStringsIfNeeds(uint64_t currentTickCount = fastTickCount());
#endif
static void gcEventCallback(GC_EventType t, void* data);
void (*m_onVMInstanceDestroy)(VMInstance* instance, void* data);
void* m_onVMInstanceDestroyData;
ToStringRecursionPreventer m_toStringRecursionPreventer;
void* m_stackStartAddress;
// regexp object data
WTF::BumpPointerAllocator* m_bumpPointerAllocator;
RegExpCacheMap* m_regexpCache;
ASCIIString** m_regexpOptionStringCache;
// date object data
#ifdef ENABLE_ICU

View file

@ -170,12 +170,19 @@ static OptionalRef<StringRef> builtinHelperFileRead(OptionalRef<ExecutionStateRe
}
}
fclose(fp);
#if defined(ENABLE_SOURCE_COMPRESSION)
if (hasNonLatin1Content) {
src = StringRef::createFromUTF8ToCompressibleString(utf8Str.data(), utf8Str.length());
#if defined(ENABLE_COMPRESSIBLE_STRING)
if (state) {
if (hasNonLatin1Content) {
src = StringRef::createFromUTF8ToCompressibleString(state->context(), utf8Str.data(), utf8Str.length());
} else {
src = StringRef::createFromLatin1ToCompressibleString(state->context(), str.data(), str.length());
}
} else {
src = StringRef::createCompressibleString(str.data(), str.length());
if (hasNonLatin1Content) {
src = StringRef::createFromUTF8(utf8Str.data(), utf8Str.length());
} else {
src = StringRef::createFromLatin1(str.data(), str.length());
}
}
#else
if (hasNonLatin1Content) {
@ -543,6 +550,11 @@ int main(int argc, char* argv[])
});
PersistentRefHolder<ContextRef> context = createEscargotContext(instance.get());
if (getenv("GC_FREE_SPACE_DIVISOR") && strlen(getenv("GC_FREE_SPACE_DIVISOR"))) {
int d = atoi(getenv("GC_FREE_SPACE_DIVISOR"));
Memory::setGCFrequency(d);
}
bool runShell = true;
bool seenModule = false;
for (int i = 1; i < argc; i++) {
@ -579,7 +591,11 @@ int main(int argc, char* argv[])
fclose(fp);
runShell = false;
StringRef* src = builtinHelperFileRead(nullptr, argv[i], "read").get();
StringRef* src = Evaluator::execute(context, [](ExecutionStateRef* state, char* c) -> ValueRef* {
return builtinHelperFileRead(state, c, "read").get();
},
argv[i])
.result->asString();
if (!evalScript(context, src, StringRef::createFromUTF8(argv[i], strlen(argv[i])), false, seenModule)) {
return 3;
@ -592,11 +608,6 @@ int main(int argc, char* argv[])
}
}
if (getenv("GC_FREE_SPACE_DIVISOR") && strlen(getenv("GC_FREE_SPACE_DIVISOR"))) {
int d = atoi(getenv("GC_FREE_SPACE_DIVISOR"));
Memory::setGCFrequency(d);
}
while (runShell) {
static char buf[2048];
printf("escargot> ");

View file

@ -101,6 +101,17 @@ int gettimeofday(struct timeval *tv, struct timezone *tz)
namespace Escargot {
uint64_t fastTickCount()
{
#if defined(CLOCK_MONOTONIC_COARSE)
timespec ts;
clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
return ts.tv_sec * 1000UL + ts.tv_nsec / 1000000UL;
#else
return tickCount();
#endif
}
uint64_t tickCount()
{
struct timeval gettick;

View file

@ -46,6 +46,21 @@ inline void clearStack()
#error
#endif
#if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
inline void* currentStackPointer()
{
return __builtin_frame_address(0);
}
#elif defined(COMPILER_MSVC)
inline void* currentStackPointer()
{
volatile int temp;
return &temp;
}
#else
#error
#endif
class StorePositiveIntergerAsOdd {
public:
StorePositiveIntergerAsOdd(const size_t& src = 0)
@ -63,6 +78,7 @@ private:
size_t m_data;
};
uint64_t fastTickCount(); // increase 1000 by 1 second(fast version. not super accurate)
uint64_t tickCount(); // increase 1000 by 1 second
uint64_t longTickCount(); // increase 1000000 by 1 second
uint64_t timestamp(); // increase 1000 by 1 second

View file

@ -69,18 +69,37 @@ struct ComputeReservedCapacityFunctionWithPercent {
}
};
template <size_t glowFactor = 125, size_t maxGap = 256>
template <size_t glowFactor = 125, size_t maxGap = 512>
struct ComputeReservedCapacityFunctionWithPercentAndGap {
size_t operator()(size_t newSize)
{
size_t newCapacity = newSize * (glowFactor / 100.f);
if (newCapacity - glowFactor > maxGap) {
if ((newCapacity - newSize) > maxGap) {
newCapacity = newSize + maxGap;
}
return newCapacity;
}
};
template <size_t glowFactor = 125>
struct ComputeReservedCapacityFunctionWithLog2AndPercent {
size_t operator()(size_t newSize)
{
if (newSize == 0) {
return 0;
}
if (newSize <= 32) {
ComputeReservedCapacityFunctionWithLog2<> f1;
return f1(newSize);
}
ComputeReservedCapacityFunctionWithLog2<> f1;
ComputeReservedCapacityFunctionWithPercentAndGap<> f2;
return std::min(f1(newSize), f2(newSize));
}
};
using VectorDefaultComputeReservedCapacityFunction = ComputeReservedCapacityFunctionWithPercent<>;
template <typename T, typename Allocator, typename ComputeReservedCapacityFunction = VectorDefaultComputeReservedCapacityFunction>

View file

@ -32,7 +32,7 @@
- LZ4 source repository : https://github.com/lz4/lz4
*/
#if defined(ENABLE_SOURCE_COMPRESSION)
#if defined(ENABLE_COMPRESSIBLE_STRING)
/*-************************************
* Tuning parameters

View file

@ -33,7 +33,7 @@
- LZ4 source repository : https://github.com/lz4/lz4
*/
#if defined(ENABLE_SOURCE_COMPRESSION)
#if defined(ENABLE_COMPRESSIBLE_STRING)
#if defined(__cplusplus)
extern "C" {

View file

@ -192,6 +192,28 @@ static char *timeValueFromFile(const char *filename, const char *tag, char *valu
}
#endif
static std::string extractLocaleName(std::string input)
{
if (input.find('.') != std::string::npos) {
input = input.substr(0, input.find('.'));
}
return input;
}
std::string ICU::findSystemLocale()
{
char* c = getenv("LANG");
if (c && strlen(c)) {
return extractLocaleName(c);
}
c = setlocale(LC_CTYPE, "");
if (c && strlen(c)) {
return extractLocaleName(c);
}
return ICU::instance().uloc_getDefault();
}
std::string ICU::findSystemTimezoneName()
{
#if defined(OS_POSIX)

View file

@ -167,6 +167,7 @@ private:
public:
static ICU& instance();
static std::string findSystemLocale();
static std::string findSystemTimezoneName();
enum Soname {