escargot/src/runtime/CompressibleString.cpp
Seonghyun Kim caa0fbc3fe Implement compress CompressibleStrings on GC reclaim end event
* Compress CompressibleStrings on GC reclaim end event
  - if there is reference about data of CompressibleString on stack, we should give up compressing.
    we don't need to search heap space because I redesigned StringView
    (we should not store string buffer data on heap without owner)
* Redesign StringView
  - Don't save string buffer address as its member. because buffer of CompressibleString can be deleted
  - If we don't save string buffer address on StringView, parser performance may dropped.
    becuase parser access string data a lot.
    so I introduce ParserStringView. it saves buffer address. we should ParserStringView on parser only.
    we can save string buffer address while parsing. because GC is disabled while parsing.

* Enable CompressibleString always
* Implement cache of RegExpOptionStrings
* Implement finding system locale function on RuntimeICUBinder avoiding call uloc_getDefault.

Signed-off-by: Seonghyun Kim <sh8281.kim@samsung.com>
2020-01-10 17:37:00 +09:00

262 lines
8.4 KiB
C++

/*
* Copyright (c) 2019-present Samsung Electronics Co., Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#if defined(ENABLE_COMPRESSIBLE_STRING)
#include "Escargot.h"
#include "CompressibleString.h"
#include "runtime/Context.h"
#include "runtime/VMInstance.h"
#include "lz4.h"
namespace Escargot {
void* CompressibleString::operator new(size_t size)
{
static bool typeInited = false;
static GC_descr descr;
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(CompressibleString)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(CompressibleString, m_context));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(CompressibleString));
typeInited = true;
}
return GC_MALLOC_EXPLICITLY_TYPED(size, descr);
}
CompressibleString::CompressibleString(Context* context)
: String()
, m_isOwnerMayFreed(false)
, m_isCompressed(false)
, m_context(context)
, m_lastUsedTickcount(fastTickCount())
{
m_bufferData.hasSpecialImpl = true;
auto& v = context->vmInstance()->compressibleStrings();
v.push_back(this);
GC_REGISTER_FINALIZER_NO_ORDER(this, [](void* obj, void*) {
CompressibleString* self = (CompressibleString*)obj;
if (self->isCompressed()) {
self->m_compressedData.~CompressedDataVector();
} else {
deallocateStringDataBuffer(const_cast<void*>(self->m_bufferData.buffer));
}
if (!self->m_isOwnerMayFreed) {
self->m_context->vmInstance()->compressibleStringsUncomressedBufferSize() -= self->decomressedBufferSize();
auto& v = self->m_context->vmInstance()->compressibleStrings();
v.erase(std::find(v.begin(), v.end(), self));
}
},
nullptr, nullptr, nullptr);
}
CompressibleString::CompressibleString(Context* context, const char* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len);
memcpy(buf, str, len);
initBufferAccessData(buf, len, true);
}
CompressibleString::CompressibleString(Context* context, const LChar* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len);
memcpy(buf, str, len);
initBufferAccessData(buf, len, true);
}
CompressibleString::CompressibleString(Context* context, const char16_t* str, size_t len)
: CompressibleString(context)
{
char* buf = (char*)allocateStringDataBuffer(sizeof(char) * len * 2);
memcpy(buf, str, len * 2);
initBufferAccessData(buf, len, false);
}
CompressibleString::CompressibleString(Context* context, void* buffer, size_t stringLength, bool is8bit)
: CompressibleString(context)
{
initBufferAccessData(buffer, stringLength, is8bit);
}
void CompressibleString::initBufferAccessData(void* data, size_t len, bool is8bit)
{
m_bufferData.has8BitContent = is8bit;
m_bufferData.length = len;
m_bufferData.buffer = data;
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() += decomressedBufferSize();
}
UTF8StringDataNonGCStd CompressibleString::toNonGCUTF8StringData() const
{
return bufferAccessData().toUTF8String<UTF8StringDataNonGCStd>();
}
UTF8StringData CompressibleString::toUTF8StringData() const
{
return bufferAccessData().toUTF8String<UTF8StringData>();
}
UTF16StringData CompressibleString::toUTF16StringData() const
{
auto data = bufferAccessData();
if (data.has8BitContent) {
UTF16StringData ret;
ret.resizeWithUninitializedValues(data.length);
for (size_t i = 0; i < data.length; i++) {
ret[i] = data.uncheckedCharAtFor8Bit(i);
}
return ret;
} else {
return UTF16StringData(data.bufferAs16Bit, data.length);
}
}
void* CompressibleString::allocateStringDataBuffer(size_t byteLength)
{
return malloc(byteLength);
}
void CompressibleString::deallocateStringDataBuffer(void* ptr)
{
free(ptr);
}
bool CompressibleString::compress()
{
ASSERT(!m_isCompressed);
if (UNLIKELY(!m_bufferData.length)) {
return false;
}
bool has8Bit = m_bufferData.has8BitContent;
if (has8Bit) {
return compressWorker<LChar>(currentStackPointer());
} else {
return compressWorker<char16_t>(currentStackPointer());
}
}
void CompressibleString::decompress()
{
ASSERT(m_isCompressed);
ASSERT(m_bufferData.length);
bool has8Bit = m_bufferData.has8BitContent;
if (has8Bit) {
decompressWorker<LChar>();
} else {
decompressWorker<char16_t>();
}
}
constexpr static const size_t g_compressChunkSize = 1044465;
static_assert(LZ4_COMPRESSBOUND(g_compressChunkSize) == 1024 * 1024, "");
template <typename StringType>
bool CompressibleString::compressWorker(void* callerSP)
{
ASSERT(!m_isCompressed);
ASSERT(m_bufferData.length > 0);
#if defined(STACK_GROWS_DOWN)
size_t* start = (size_t*)((size_t)callerSP & ~(sizeof(size_t) - 1));
size_t* end = (size_t*)m_context->vmInstance()->stackStartAddress();
#else
size_t* start = (size_t*)m_context->vmInstance()->stackStartAddress();
size_t* end = (size_t*)((size_t)callerSP & ~(sizeof(size_t) - 1));
#endif
while (start != end) {
if (UNLIKELY(*start == (size_t)m_bufferData.buffer)) {
// if there is reference on stack, we cannot compress string.
return false;
}
start++;
}
size_t originByteLength = m_bufferData.length * sizeof(StringType);
int lastBoundLength = 0;
std::unique_ptr<char[]> compBuffer;
for (size_t srcIndex = 0; srcIndex < originByteLength; srcIndex += g_compressChunkSize) {
int srcSize = (int)std::min(g_compressChunkSize, originByteLength - srcIndex);
int boundLength = LZ4::LZ4_compressBound(srcSize);
if (boundLength > lastBoundLength) {
compBuffer.reset(new char[boundLength]);
lastBoundLength = boundLength;
}
int compressedLength = LZ4::LZ4_compress_default(m_bufferData.bufferAs8Bit + srcIndex, (char*)compBuffer.get(), srcSize, boundLength);
if (!compressedLength) {
// compression fail
return false;
}
ASSERT(compressedLength > 0);
m_compressedData.push_back(std::vector<char>(compBuffer.get(), compBuffer.get() + compressedLength));
}
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() -= decomressedBufferSize();
// immediately free the original string after compression when there is no reference on stack
deallocateStringDataBuffer(const_cast<void*>(m_bufferData.buffer));
m_bufferData.bufferAs8Bit = nullptr;
m_isCompressed = true;
return true;
}
template <typename StringType>
void CompressibleString::decompressWorker()
{
ASSERT(m_isCompressed);
size_t originByteLength = m_bufferData.length * sizeof(StringType);
char* dstBuffer = (char*)allocateStringDataBuffer(originByteLength);
int dstIndex = 0;
for (size_t srcIndex = 0, bufIndex = 0; srcIndex < originByteLength; srcIndex += g_compressChunkSize, bufIndex++) {
int srcSize = (int)std::min(g_compressChunkSize, originByteLength - srcIndex);
int decompressedLength = LZ4::LZ4_decompress_safe(m_compressedData[bufIndex].data(), dstBuffer + dstIndex, m_compressedData[bufIndex].size(), srcSize);
if (!decompressedLength) {
// decompress fail
RELEASE_ASSERT_NOT_REACHED();
}
dstIndex += srcSize;
}
CompressedDataVector().swap(m_compressedData);
m_bufferData.bufferAs8Bit = const_cast<const char*>(dstBuffer);
m_isCompressed = false;
m_context->vmInstance()->compressibleStringsUncomressedBufferSize() += decomressedBufferSize();
}
}
#endif // ENABLE_COMPRESSIBLE_STRING