mirror of
https://github.com/Samsung/escargot.git
synced 2026-06-22 10:01:50 +00:00
Use QuickJS RegExp engine
Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
parent
8444ae515f
commit
48ddeb5833
13 changed files with 9996 additions and 28 deletions
|
|
@ -173,6 +173,10 @@ IF (ESCARGOT_WASM)
|
|||
SET (ESCARGOT_DEFINITIONS ${ESCARGOT_DEFINITIONS} -DENABLE_WASM)
|
||||
ENDIF()
|
||||
|
||||
IF (ESCARGOT_QUICKJS_REGEXP)
|
||||
SET (ESCARGOT_DEFINITIONS ${ESCARGOT_DEFINITIONS} -DENABLE_QUICKJS_REGEXP)
|
||||
ENDIF()
|
||||
|
||||
#######################################################
|
||||
# flags for $(MODE) : debug/release
|
||||
#######################################################
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ SET (ESCARGOT_INCDIRS
|
|||
${ESCARGOT_THIRD_PARTY_ROOT}/lz4/
|
||||
${ESCARGOT_THIRD_PARTY_ROOT}/rapidjson/include/
|
||||
${ESCARGOT_THIRD_PARTY_ROOT}/yarr/
|
||||
${ESCARGOT_THIRD_PARTY_ROOT}/quickjs_libregexp/
|
||||
${ESCARGOT_THIRD_PARTY_ROOT}/runtime_icu_binder/
|
||||
)
|
||||
|
||||
|
|
@ -50,6 +51,7 @@ ENDIF()
|
|||
# SOURCE FILES
|
||||
FILE (GLOB_RECURSE ESCARGOT_SRC ${ESCARGOT_ROOT}/src/*.cpp)
|
||||
FILE (GLOB YARR_SRC ${ESCARGOT_THIRD_PARTY_ROOT}/yarr/*.cpp)
|
||||
FILE (GLOB REGEXP_SRC ${ESCARGOT_THIRD_PARTY_ROOT}/quickjs_libregexp/*.cpp)
|
||||
FILE (GLOB DOUBLE_CONVERSION_SRC ${ESCARGOT_THIRD_PARTY_ROOT}/double_conversion/*.cc)
|
||||
FILE (GLOB LZ4_SRC ${ESCARGOT_THIRD_PARTY_ROOT}/lz4/*.cpp)
|
||||
|
||||
|
|
@ -65,6 +67,7 @@ ENDIF()
|
|||
SET (ESCARGOT_SRC_LIST
|
||||
${ESCARGOT_SRC}
|
||||
${YARR_SRC}
|
||||
${REGEXP_SRC}
|
||||
${DOUBLE_CONVERSION_SRC}
|
||||
${LZ4_SRC}
|
||||
${CCTEST_SRC}
|
||||
|
|
|
|||
|
|
@ -347,7 +347,7 @@ static Value builtinStringReplace(ExecutionState& state, Value thisValue, size_t
|
|||
|
||||
bool isSearchValueRegExp = searchValue.isPointerValue() && searchValue.asPointerValue()->isRegExpObject();
|
||||
// we should keep fast-path while performace issue is unresolved
|
||||
bool canUseFastPath = searchValue.isString() || (isSearchValueRegExp && searchValue.asPointerValue()->asRegExpObject()->yarrPatern()->m_captureGroupNames.size() == 0);
|
||||
bool canUseFastPath = searchValue.isString() || (isSearchValueRegExp && !searchValue.asPointerValue()->asRegExpObject()->hasNamedGroups());
|
||||
if (!searchValue.isUndefinedOrNull()) {
|
||||
Value replacer = Object::getMethod(state, searchValue, ObjectPropertyName(state.context()->vmInstance()->globalSymbols().replace));
|
||||
if (canUseFastPath && isSearchValueRegExp && replacer.isPointerValue() && replacer.asPointerValue() == state.context()->globalObject()->regexpReplaceMethod()) {
|
||||
|
|
|
|||
|
|
@ -23,10 +23,14 @@
|
|||
#include "ArrayObject.h"
|
||||
#include "VMInstance.h"
|
||||
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
#include "libregexp.h"
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
#include "WTFBridge.h"
|
||||
#include "Yarr.h"
|
||||
#include "YarrPattern.h"
|
||||
#include "YarrInterpreter.h"
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
|
||||
namespace Escargot {
|
||||
|
||||
|
|
@ -57,8 +61,13 @@ RegExpObject::RegExpObject(ExecutionState& state, Object* proto, bool hasLastInd
|
|||
, m_source(NULL)
|
||||
, m_optionString(NULL)
|
||||
, m_option(None)
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
, m_bytecode(NULL)
|
||||
, m_groupNames(NULL)
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
, m_yarrPattern(NULL)
|
||||
, m_bytecodePattern(NULL)
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
, m_lastIndex(Value(0))
|
||||
, m_lastExecutedString(NULL)
|
||||
, m_legacyFeaturesEnabled(true)
|
||||
|
|
@ -90,8 +99,13 @@ void* RegExpObject::operator new(size_t size)
|
|||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_values));
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_source));
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_optionString));
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_bytecode));
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_groupNames));
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_yarrPattern));
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_bytecodePattern));
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_lastIndex));
|
||||
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(RegExpObject, m_lastExecutedString));
|
||||
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(RegExpObject));
|
||||
|
|
@ -170,14 +184,19 @@ void RegExpObject::internalInit(ExecutionState& state, String* source, String* o
|
|||
m_source = escapeSlashInPattern(m_source);
|
||||
|
||||
auto entry = getCacheEntryAndCompileIfNeeded(state, m_source, m_option);
|
||||
if (entry.m_yarrError) {
|
||||
if (entry.m_error) {
|
||||
m_source = previousSource;
|
||||
m_option = previousOptions;
|
||||
ErrorObject::throwBuiltinError(state, ErrorObject::SyntaxError, entry.m_yarrError);
|
||||
ErrorObject::throwBuiltinError(state, ErrorObject::SyntaxError, entry.m_error);
|
||||
}
|
||||
setLastIndex(state, Value(0));
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
m_bytecode = entry.m_bytecode;
|
||||
m_groupNames = entry.m_groupNames;
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
m_yarrPattern = entry.m_yarrPattern;
|
||||
m_bytecodePattern = entry.m_bytecodePattern;
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
}
|
||||
|
||||
void RegExpObject::init(ExecutionState& state, String* source, String* option)
|
||||
|
|
@ -263,11 +282,7 @@ void RegExpObject::parseOption(ExecutionState& state, String* optionString)
|
|||
|
||||
void RegExpObject::setOption(const Option& option)
|
||||
{
|
||||
if (((m_option & Option::MultiLine) != (option & Option::MultiLine))
|
||||
|| ((m_option & Option::IgnoreCase) != (option & Option::IgnoreCase))) {
|
||||
ASSERT(!m_yarrPattern);
|
||||
m_bytecodePattern = NULL;
|
||||
}
|
||||
ASSERT((m_option | Option::Global) == (option | Option::Global));
|
||||
m_option = option;
|
||||
}
|
||||
|
||||
|
|
@ -278,16 +293,72 @@ RegExpObject::RegExpCacheEntry& RegExpObject::getCacheEntryAndCompileIfNeeded(Ex
|
|||
if (it != cache->end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
const char* yarrError = nullptr;
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
char* error = nullptr;
|
||||
uint8_t* bytecode = nullptr;
|
||||
CaptureGroupNameVector* groupNames = nullptr;
|
||||
|
||||
UTF8StringDataNonGCStd str = source->toNonGCUTF8StringData();
|
||||
|
||||
int bytecodeLen;
|
||||
char errorMessage[128];
|
||||
int flags = 0;
|
||||
|
||||
if (option & Option::IgnoreCase) {
|
||||
flags |= LRE_FLAG_IGNORECASE;
|
||||
}
|
||||
if (option & Option::MultiLine) {
|
||||
flags |= LRE_FLAG_MULTILINE;
|
||||
}
|
||||
if (option & Option::Sticky) {
|
||||
flags |= LRE_FLAG_STICKY;
|
||||
}
|
||||
if (option & Option::Unicode) {
|
||||
flags |= LRE_FLAG_UTF16;
|
||||
}
|
||||
if (option & Option::DotAll) {
|
||||
flags |= LRE_FLAG_DOTALL;
|
||||
}
|
||||
|
||||
bytecode = lre_compile(&bytecodeLen, errorMessage, sizeof(errorMessage), str.data(), str.length(), flags, NULL);
|
||||
|
||||
if (bytecode == nullptr) {
|
||||
size_t size = strlen(errorMessage);
|
||||
error = reinterpret_cast<char*>(GC_MALLOC(size + 1));
|
||||
memcpy(error, errorMessage, size + 1);
|
||||
} else {
|
||||
const char* groupNamesUtf8 = lre_get_groupnames(bytecode);
|
||||
|
||||
if (groupNamesUtf8 != nullptr) {
|
||||
groupNames = new CaptureGroupNameVector();
|
||||
size_t subPatternNum = (size_t)lre_get_capture_count(bytecode);
|
||||
|
||||
for (size_t index = 1; index < subPatternNum; index++) {
|
||||
if (*groupNamesUtf8 == '\0') {
|
||||
groupNamesUtf8++;
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t length = strlen(groupNamesUtf8);
|
||||
|
||||
groupNames->pushBack(CaptureGroupName(String::fromUTF8(groupNamesUtf8, length), index));
|
||||
groupNamesUtf8 += length + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return cache->insert(std::make_pair(RegExpCacheKey(source, option), RegExpCacheEntry(error, bytecode, groupNames))).first->second;
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
const char* error = nullptr;
|
||||
JSC::Yarr::YarrPattern* yarrPattern = nullptr;
|
||||
try {
|
||||
JSC::Yarr::ErrorCode errorCode = JSC::Yarr::ErrorCode::NoError;
|
||||
yarrPattern = JSC::Yarr::YarrPattern::createYarrPattern(source, (JSC::Yarr::RegExpFlags)option, errorCode);
|
||||
yarrError = JSC::Yarr::errorMessage(errorCode);
|
||||
error = JSC::Yarr::errorMessage(errorCode);
|
||||
} catch (const std::bad_alloc& e) {
|
||||
ErrorObject::throwBuiltinError(state, ErrorObject::TypeError, "got too complicated RegExp pattern to process");
|
||||
}
|
||||
return cache->insert(std::make_pair(RegExpCacheKey(source, option), RegExpCacheEntry(yarrError, yarrPattern))).first->second;
|
||||
return cache->insert(std::make_pair(RegExpCacheKey(source, option), RegExpCacheEntry(error, yarrPattern))).first->second;
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -307,9 +378,116 @@ bool RegExpObject::match(ExecutionState& state, String* str, RegexMatchResult& m
|
|||
|
||||
m_lastExecutedString = str;
|
||||
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
if (!m_bytecode) {
|
||||
RegExpCacheEntry& entry = getCacheEntryAndCompileIfNeeded(state, m_source, m_option);
|
||||
if (entry.m_error) {
|
||||
matchResult.m_subPatternNum = 0;
|
||||
return false;
|
||||
}
|
||||
m_bytecode = entry.m_bytecode;
|
||||
m_groupNames = entry.m_groupNames;
|
||||
}
|
||||
|
||||
int subPatternNum = lre_get_capture_count(m_bytecode);
|
||||
matchResult.m_subPatternNum = subPatternNum - 1;
|
||||
int length = (int)str->length();
|
||||
int start = (int)startIndex;
|
||||
int result;
|
||||
bool isGlobal = option() & RegExpObject::Option::Global;
|
||||
bool isSticky = option() & RegExpObject::Option::Sticky;
|
||||
bool gotResult = false;
|
||||
int buffer_type = str->has8BitContent() ? 0 : 1;
|
||||
const uint8_t* buffer = buffer_type == 0 ? str->characters8() : (const uint8_t*)str->characters16();
|
||||
uint8_t** outputBuf = ALLOCA(sizeof(uint8_t*) * 2 * subPatternNum, uint8_t*, state);
|
||||
|
||||
while (true) {
|
||||
if (start > length) {
|
||||
break;
|
||||
}
|
||||
|
||||
memset(outputBuf, 0, sizeof(uint8_t*) * 2 * subPatternNum);
|
||||
result = lre_exec(outputBuf, m_bytecode, buffer, start, length, buffer_type, NULL);
|
||||
|
||||
if (result != 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
gotResult = true;
|
||||
unsigned maxMatchedIndex = subPatternNum - 1;
|
||||
|
||||
bool lastParenInvalid = false;
|
||||
for (; maxMatchedIndex > 0; maxMatchedIndex--) {
|
||||
if (outputBuf[maxMatchedIndex * 2] != NULL) {
|
||||
break;
|
||||
} else {
|
||||
lastParenInvalid = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Details:{3, 10, 3, 10, 3, 6, 7, 10, 1684872, 806200}
|
||||
legacyFeatures.dollarCount = maxMatchedIndex;
|
||||
unsigned dollarEnd = std::min(maxMatchedIndex, (unsigned)9);
|
||||
for (unsigned i = 1; i <= dollarEnd; i++) {
|
||||
if (outputBuf[i * 2] == NULL) {
|
||||
legacyFeatures.dollars[i - 1] = StringView();
|
||||
} else {
|
||||
legacyFeatures.dollars[i - 1] = StringView(str, (outputBuf[i * 2] - buffer) >> buffer_type, (outputBuf[i * 2 + 1] - buffer) >> buffer_type);
|
||||
}
|
||||
}
|
||||
|
||||
if (UNLIKELY(testOnly)) {
|
||||
// outputBuf[1] should be set to lastIndex
|
||||
if (isGlobal || isSticky) {
|
||||
setLastIndex(state, Value((outputBuf[1] - buffer) >> buffer_type));
|
||||
}
|
||||
if (!lastParenInvalid && subPatternNum > 1) {
|
||||
legacyFeatures.lastParen = StringView(str, (outputBuf[maxMatchedIndex * 2] - buffer) >> buffer_type, (outputBuf[maxMatchedIndex * 2 + 1] - buffer) >> buffer_type);
|
||||
} else {
|
||||
legacyFeatures.lastParen = StringView();
|
||||
}
|
||||
legacyFeatures.lastMatch = StringView(str, (outputBuf[0] - buffer) >> buffer_type, (outputBuf[1] - buffer) >> buffer_type);
|
||||
legacyFeatures.leftContext = StringView(str, 0, (outputBuf[0] - buffer) >> buffer_type);
|
||||
legacyFeatures.rightContext = StringView(str, (outputBuf[1] - buffer) >> buffer_type, length);
|
||||
return true;
|
||||
}
|
||||
std::vector<RegexMatchResult::RegexMatchResultPiece> piece;
|
||||
piece.resize(subPatternNum);
|
||||
|
||||
for (int i = 0; i < subPatternNum; i++) {
|
||||
RegexMatchResult::RegexMatchResultPiece p;
|
||||
p.m_start = (outputBuf[i * 2] - buffer) >> buffer_type;
|
||||
p.m_end = (outputBuf[i * 2 + 1] - buffer) >> buffer_type;
|
||||
piece[i] = p;
|
||||
}
|
||||
|
||||
if (!lastParenInvalid && subPatternNum > 1) {
|
||||
legacyFeatures.lastParen = StringView(str, piece[maxMatchedIndex].m_start, piece[maxMatchedIndex].m_end);
|
||||
} else {
|
||||
legacyFeatures.lastParen = StringView();
|
||||
}
|
||||
|
||||
legacyFeatures.leftContext = StringView(str, 0, piece[0].m_start);
|
||||
legacyFeatures.rightContext = StringView(str, piece[maxMatchedIndex].m_end, length);
|
||||
legacyFeatures.lastMatch = StringView(str, piece[0].m_start, piece[0].m_end);
|
||||
matchResult.m_matchResults.push_back(std::vector<RegexMatchResult::RegexMatchResultPiece>(std::move(piece)));
|
||||
if (!isGlobal)
|
||||
break;
|
||||
|
||||
int new_start = (outputBuf[1] - buffer) >> buffer_type;
|
||||
if (start == new_start) {
|
||||
start++;
|
||||
if (start > length) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
start = new_start;
|
||||
}
|
||||
}
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
if (!m_bytecodePattern) {
|
||||
RegExpCacheEntry& entry = getCacheEntryAndCompileIfNeeded(state, m_source, m_option);
|
||||
if (entry.m_yarrError) {
|
||||
if (entry.m_error) {
|
||||
matchResult.m_subPatternNum = 0;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -417,6 +595,7 @@ bool RegExpObject::match(ExecutionState& state, String* str, RegexMatchResult& m
|
|||
break;
|
||||
}
|
||||
} while (result != JSC::Yarr::offsetNoMatch);
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
|
||||
if (!gotResult && ((option() & (RegExpObject::Option::Global | RegExpObject::Option::Sticky)))) {
|
||||
setLastIndex(state, Value(0));
|
||||
|
|
@ -483,6 +662,19 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
if (m_groupNames == NULL) {
|
||||
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(), ObjectPropertyDescriptor::AllPresent));
|
||||
} else {
|
||||
Object* groups = new Object(state);
|
||||
groups->setPrototype(state, Value(Value::Null));
|
||||
for (auto it = m_groupNames->begin(); it != m_groupNames->end(); ++it) {
|
||||
groups->defineOwnProperty(state, ObjectPropertyName(state, it->name),
|
||||
ObjectPropertyDescriptor(arr->getOwnProperty(state, ObjectPropertyName(state, it->index)).value(state, this), ObjectPropertyDescriptor::AllPresent));
|
||||
}
|
||||
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent));
|
||||
}
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
if (m_yarrPattern->m_namedGroupToParenIndex.empty()) {
|
||||
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(), ObjectPropertyDescriptor::AllPresent));
|
||||
} else {
|
||||
|
|
@ -497,6 +689,7 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const
|
|||
}
|
||||
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent));
|
||||
}
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
|
||||
// FIXME RegExp should have own Realm internal slot when allocated
|
||||
if (state.context() == this->getFunctionRealm(state)) {
|
||||
|
|
@ -525,6 +718,15 @@ void RegExpObject::pushBackToRegExpMatchedArray(ExecutionState& state, ArrayObje
|
|||
}
|
||||
}
|
||||
|
||||
bool RegExpObject::hasNamedGroups()
|
||||
{
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
return lre_get_groupnames(m_bytecode) != NULL;
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
return m_yarrPattern->m_captureGroupNames.size() != 0;
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
}
|
||||
|
||||
RegExpStringIteratorObject::RegExpStringIteratorObject(ExecutionState& state, bool global, bool unicode, RegExpObject* regexp, String* string)
|
||||
: IteratorObject(state, state.context()->globalObject()->regexpStringIteratorPrototype())
|
||||
, m_isGlobal(global)
|
||||
|
|
|
|||
|
|
@ -24,16 +24,32 @@
|
|||
#include "runtime/ErrorObject.h"
|
||||
#include "runtime/IteratorObject.h"
|
||||
|
||||
|
||||
#ifndef ENABLE_QUICKJS_REGEXP
|
||||
namespace JSC {
|
||||
namespace Yarr {
|
||||
struct YarrPattern;
|
||||
struct BytecodePattern;
|
||||
} // namespace Yarr
|
||||
} // namespace JSC
|
||||
#endif /* !ENABLE_QUICKJS_REGEXP */
|
||||
|
||||
namespace Escargot {
|
||||
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
struct CaptureGroupName {
|
||||
CaptureGroupName(String* name, size_t index)
|
||||
: name(name)
|
||||
, index(index)
|
||||
{
|
||||
}
|
||||
|
||||
String* name;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
typedef Vector<CaptureGroupName, GCUtil::gc_malloc_allocator<CaptureGroupName>> CaptureGroupNameVector;
|
||||
#endif /* !ENABLE_QUICKJS_REGEXP */
|
||||
|
||||
struct RegexMatchResult {
|
||||
struct RegexMatchResultPiece {
|
||||
unsigned m_start, m_end;
|
||||
|
|
@ -75,16 +91,28 @@ public:
|
|||
};
|
||||
|
||||
struct RegExpCacheEntry {
|
||||
RegExpCacheEntry(const char* yarrError = nullptr, JSC::Yarr::YarrPattern* yarrPattern = nullptr, JSC::Yarr::BytecodePattern* bytecodePattern = nullptr)
|
||||
: m_yarrError(yarrError)
|
||||
, m_yarrPattern(yarrPattern)
|
||||
, m_bytecodePattern(bytecodePattern)
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
RegExpCacheEntry(const char* error = nullptr, uint8_t* bytecode = nullptr, CaptureGroupNameVector* groupNames = nullptr)
|
||||
: m_bytecode(bytecode)
|
||||
, m_groupNames(groupNames)
|
||||
, m_error(error)
|
||||
{
|
||||
}
|
||||
|
||||
uint8_t* m_bytecode;
|
||||
CaptureGroupNameVector* m_groupNames;
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
RegExpCacheEntry(const char* error = nullptr, JSC::Yarr::YarrPattern* yarrPattern = nullptr, JSC::Yarr::BytecodePattern* bytecodePattern = nullptr)
|
||||
: m_yarrPattern(yarrPattern)
|
||||
, m_bytecodePattern(bytecodePattern)
|
||||
, m_error(error)
|
||||
{
|
||||
}
|
||||
|
||||
const char* m_yarrError;
|
||||
JSC::Yarr::YarrPattern* m_yarrPattern;
|
||||
JSC::Yarr::BytecodePattern* m_bytecodePattern;
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
const char* m_error;
|
||||
};
|
||||
|
||||
RegExpObject(ExecutionState& state, String* source, String* option);
|
||||
|
|
@ -124,23 +152,14 @@ public:
|
|||
return m_option;
|
||||
}
|
||||
|
||||
JSC::Yarr::YarrPattern* yarrPatern()
|
||||
{
|
||||
return m_yarrPattern;
|
||||
}
|
||||
|
||||
JSC::Yarr::BytecodePattern* bytecodePattern()
|
||||
{
|
||||
return m_bytecodePattern;
|
||||
}
|
||||
|
||||
Value lastIndex()
|
||||
{
|
||||
return m_lastIndex;
|
||||
}
|
||||
|
||||
void setLastIndex(ExecutionState& state, const Value& v);
|
||||
bool hasNamedGroups();
|
||||
|
||||
void setLastIndex(ExecutionState& state, const Value& v);
|
||||
|
||||
bool legacyFeaturesEnabled()
|
||||
{
|
||||
|
|
@ -180,8 +199,13 @@ private:
|
|||
String* m_source;
|
||||
String* m_optionString;
|
||||
Option m_option;
|
||||
#ifdef ENABLE_QUICKJS_REGEXP
|
||||
uint8_t* m_bytecode;
|
||||
CaptureGroupNameVector* m_groupNames;
|
||||
#else /* !ENABLE_QUICKJS_REGEXP */
|
||||
JSC::Yarr::YarrPattern* m_yarrPattern;
|
||||
JSC::Yarr::BytecodePattern* m_bytecodePattern;
|
||||
#endif /* ENABLE_QUICKJS_REGEXP */
|
||||
EncodedValue m_lastIndex;
|
||||
const String* m_lastExecutedString;
|
||||
bool m_legacyFeaturesEnabled;
|
||||
|
|
|
|||
631
third_party/quickjs_libregexp/cutils.cpp
vendored
Normal file
631
third_party/quickjs_libregexp/cutils.cpp
vendored
Normal file
|
|
@ -0,0 +1,631 @@
|
|||
/*
|
||||
* C utilities
|
||||
*
|
||||
* Copyright (c) 2017 Fabrice Bellard
|
||||
* Copyright (c) 2018 Charlie Gordon
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cutils.h"
|
||||
|
||||
void pstrcpy(char *buf, int buf_size, const char *str)
|
||||
{
|
||||
int c;
|
||||
char *q = buf;
|
||||
|
||||
if (buf_size <= 0)
|
||||
return;
|
||||
|
||||
for(;;) {
|
||||
c = *str++;
|
||||
if (c == 0 || q >= buf + buf_size - 1)
|
||||
break;
|
||||
*q++ = c;
|
||||
}
|
||||
*q = '\0';
|
||||
}
|
||||
|
||||
/* strcat and truncate. */
|
||||
char *pstrcat(char *buf, int buf_size, const char *s)
|
||||
{
|
||||
int len;
|
||||
len = strlen(buf);
|
||||
if (len < buf_size)
|
||||
pstrcpy(buf + len, buf_size - len, s);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int strstart(const char *str, const char *val, const char **ptr)
|
||||
{
|
||||
const char *p, *q;
|
||||
p = str;
|
||||
q = val;
|
||||
while (*q != '\0') {
|
||||
if (*p != *q)
|
||||
return 0;
|
||||
p++;
|
||||
q++;
|
||||
}
|
||||
if (ptr)
|
||||
*ptr = p;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int has_suffix(const char *str, const char *suffix)
|
||||
{
|
||||
size_t len = strlen(str);
|
||||
size_t slen = strlen(suffix);
|
||||
return (len >= slen && !memcmp(str + len - slen, suffix, slen));
|
||||
}
|
||||
|
||||
/* Dynamic buffer package */
|
||||
|
||||
static void *dbuf_default_realloc(void *opaque, void *ptr, size_t size)
|
||||
{
|
||||
return realloc(ptr, size);
|
||||
}
|
||||
|
||||
void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
if (!realloc_func)
|
||||
realloc_func = dbuf_default_realloc;
|
||||
s->opaque = opaque;
|
||||
s->realloc_func = realloc_func;
|
||||
}
|
||||
|
||||
void dbuf_init(DynBuf *s)
|
||||
{
|
||||
dbuf_init2(s, NULL, NULL);
|
||||
}
|
||||
|
||||
/* return < 0 if error */
|
||||
int dbuf_realloc(DynBuf *s, size_t new_size)
|
||||
{
|
||||
size_t size;
|
||||
uint8_t *new_buf;
|
||||
if (new_size > s->allocated_size) {
|
||||
if (s->error)
|
||||
return -1;
|
||||
size = s->allocated_size * 3 / 2;
|
||||
if (size > new_size)
|
||||
new_size = size;
|
||||
new_buf = (uint8_t*)s->realloc_func(s->opaque, s->buf, new_size);
|
||||
if (!new_buf) {
|
||||
s->error = TRUE;
|
||||
return -1;
|
||||
}
|
||||
s->buf = new_buf;
|
||||
s->allocated_size = new_size;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len)
|
||||
{
|
||||
size_t end;
|
||||
end = offset + len;
|
||||
if (dbuf_realloc(s, end))
|
||||
return -1;
|
||||
memcpy(s->buf + offset, data, len);
|
||||
if (end > s->size)
|
||||
s->size = end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dbuf_put(DynBuf *s, const uint8_t *data, size_t len)
|
||||
{
|
||||
if (unlikely((s->size + len) > s->allocated_size)) {
|
||||
if (dbuf_realloc(s, s->size + len))
|
||||
return -1;
|
||||
}
|
||||
memcpy(s->buf + s->size, data, len);
|
||||
s->size += len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dbuf_put_self(DynBuf *s, size_t offset, size_t len)
|
||||
{
|
||||
if (unlikely((s->size + len) > s->allocated_size)) {
|
||||
if (dbuf_realloc(s, s->size + len))
|
||||
return -1;
|
||||
}
|
||||
memcpy(s->buf + s->size, s->buf + offset, len);
|
||||
s->size += len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dbuf_putc(DynBuf *s, uint8_t c)
|
||||
{
|
||||
return dbuf_put(s, &c, 1);
|
||||
}
|
||||
|
||||
int dbuf_putstr(DynBuf *s, const char *str)
|
||||
{
|
||||
return dbuf_put(s, (const uint8_t *)str, strlen(str));
|
||||
}
|
||||
|
||||
int __attribute__((format(printf, 2, 3))) dbuf_printf(DynBuf *s,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char buf[128];
|
||||
int len;
|
||||
|
||||
va_start(ap, fmt);
|
||||
len = vsnprintf(buf, sizeof(buf), fmt, ap);
|
||||
va_end(ap);
|
||||
if (len < (int)sizeof(buf)) {
|
||||
/* fast case */
|
||||
return dbuf_put(s, (uint8_t *)buf, len);
|
||||
} else {
|
||||
if (dbuf_realloc(s, s->size + len + 1))
|
||||
return -1;
|
||||
va_start(ap, fmt);
|
||||
vsnprintf((char *)(s->buf + s->size), s->allocated_size - s->size,
|
||||
fmt, ap);
|
||||
va_end(ap);
|
||||
s->size += len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dbuf_free(DynBuf *s)
|
||||
{
|
||||
/* we test s->buf as a fail safe to avoid crashing if dbuf_free()
|
||||
is called twice */
|
||||
if (s->buf) {
|
||||
s->realloc_func(s->opaque, s->buf, 0);
|
||||
}
|
||||
memset(s, 0, sizeof(*s));
|
||||
}
|
||||
|
||||
/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
|
||||
are output. */
|
||||
int unicode_to_utf8(uint8_t *buf, unsigned int c)
|
||||
{
|
||||
uint8_t *q = buf;
|
||||
|
||||
if (c < 0x80) {
|
||||
*q++ = c;
|
||||
} else {
|
||||
if (c < 0x800) {
|
||||
*q++ = (c >> 6) | 0xc0;
|
||||
} else {
|
||||
if (c < 0x10000) {
|
||||
*q++ = (c >> 12) | 0xe0;
|
||||
} else {
|
||||
if (c < 0x00200000) {
|
||||
*q++ = (c >> 18) | 0xf0;
|
||||
} else {
|
||||
if (c < 0x04000000) {
|
||||
*q++ = (c >> 24) | 0xf8;
|
||||
} else if (c < 0x80000000) {
|
||||
*q++ = (c >> 30) | 0xfc;
|
||||
*q++ = ((c >> 24) & 0x3f) | 0x80;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
*q++ = ((c >> 18) & 0x3f) | 0x80;
|
||||
}
|
||||
*q++ = ((c >> 12) & 0x3f) | 0x80;
|
||||
}
|
||||
*q++ = ((c >> 6) & 0x3f) | 0x80;
|
||||
}
|
||||
*q++ = (c & 0x3f) | 0x80;
|
||||
}
|
||||
return q - buf;
|
||||
}
|
||||
|
||||
static const unsigned int utf8_min_code[5] = {
|
||||
0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
|
||||
};
|
||||
|
||||
static const unsigned char utf8_first_code_mask[5] = {
|
||||
0x1f, 0xf, 0x7, 0x3, 0x1,
|
||||
};
|
||||
|
||||
/* return -1 if error. *pp is not updated in this case. max_len must
|
||||
be >= 1. The maximum length for a UTF8 byte sequence is 6 bytes. */
|
||||
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
|
||||
{
|
||||
int l, c, b, i;
|
||||
|
||||
c = *p++;
|
||||
if (c < 0x80) {
|
||||
*pp = p;
|
||||
return c;
|
||||
}
|
||||
switch(c) {
|
||||
case 0xc0: case 0xc1: case 0xc2: case 0xc3:
|
||||
case 0xc4: case 0xc5: case 0xc6: case 0xc7:
|
||||
case 0xc8: case 0xc9: case 0xca: case 0xcb:
|
||||
case 0xcc: case 0xcd: case 0xce: case 0xcf:
|
||||
case 0xd0: case 0xd1: case 0xd2: case 0xd3:
|
||||
case 0xd4: case 0xd5: case 0xd6: case 0xd7:
|
||||
case 0xd8: case 0xd9: case 0xda: case 0xdb:
|
||||
case 0xdc: case 0xdd: case 0xde: case 0xdf:
|
||||
l = 1;
|
||||
break;
|
||||
case 0xe0: case 0xe1: case 0xe2: case 0xe3:
|
||||
case 0xe4: case 0xe5: case 0xe6: case 0xe7:
|
||||
case 0xe8: case 0xe9: case 0xea: case 0xeb:
|
||||
case 0xec: case 0xed: case 0xee: case 0xef:
|
||||
l = 2;
|
||||
break;
|
||||
case 0xf0: case 0xf1: case 0xf2: case 0xf3:
|
||||
case 0xf4: case 0xf5: case 0xf6: case 0xf7:
|
||||
l = 3;
|
||||
break;
|
||||
case 0xf8: case 0xf9: case 0xfa: case 0xfb:
|
||||
l = 4;
|
||||
break;
|
||||
case 0xfc: case 0xfd:
|
||||
l = 5;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
/* check that we have enough characters */
|
||||
if (l > (max_len - 1))
|
||||
return -1;
|
||||
c &= utf8_first_code_mask[l - 1];
|
||||
for(i = 0; i < l; i++) {
|
||||
b = *p++;
|
||||
if (b < 0x80 || b >= 0xc0)
|
||||
return -1;
|
||||
c = (c << 6) | (b & 0x3f);
|
||||
}
|
||||
if (c < (int)utf8_min_code[l - 1])
|
||||
return -1;
|
||||
*pp = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
#if defined(EMSCRIPTEN) || defined(__ANDROID__)
|
||||
|
||||
static void *rqsort_arg;
|
||||
static int (*rqsort_cmp)(const void *, const void *, void *);
|
||||
|
||||
static int rqsort_cmp2(const void *p1, const void *p2)
|
||||
{
|
||||
return rqsort_cmp(p1, p2, rqsort_arg);
|
||||
}
|
||||
|
||||
/* not reentrant, but not needed with emscripten */
|
||||
void rqsort(void *base, size_t nmemb, size_t size,
|
||||
int (*cmp)(const void *, const void *, void *),
|
||||
void *arg)
|
||||
{
|
||||
rqsort_arg = arg;
|
||||
rqsort_cmp = cmp;
|
||||
qsort(base, nmemb, size, rqsort_cmp2);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
typedef void (*exchange_f)(void *a, void *b, size_t size);
|
||||
typedef int (*cmp_f)(const void *, const void *, void *opaque);
|
||||
|
||||
static void exchange_bytes(void *a, void *b, size_t size) {
|
||||
uint8_t *ap = (uint8_t *)a;
|
||||
uint8_t *bp = (uint8_t *)b;
|
||||
|
||||
while (size-- != 0) {
|
||||
uint8_t t = *ap;
|
||||
*ap++ = *bp;
|
||||
*bp++ = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void exchange_one_byte(void *a, void *b, size_t size) {
|
||||
uint8_t *ap = (uint8_t *)a;
|
||||
uint8_t *bp = (uint8_t *)b;
|
||||
uint8_t t = *ap;
|
||||
*ap = *bp;
|
||||
*bp = t;
|
||||
}
|
||||
|
||||
static void exchange_int16s(void *a, void *b, size_t size) {
|
||||
uint16_t *ap = (uint16_t *)a;
|
||||
uint16_t *bp = (uint16_t *)b;
|
||||
|
||||
for (size /= sizeof(uint16_t); size-- != 0;) {
|
||||
uint16_t t = *ap;
|
||||
*ap++ = *bp;
|
||||
*bp++ = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void exchange_one_int16(void *a, void *b, size_t size) {
|
||||
uint16_t *ap = (uint16_t *)a;
|
||||
uint16_t *bp = (uint16_t *)b;
|
||||
uint16_t t = *ap;
|
||||
*ap = *bp;
|
||||
*bp = t;
|
||||
}
|
||||
|
||||
static void exchange_int32s(void *a, void *b, size_t size) {
|
||||
uint32_t *ap = (uint32_t *)a;
|
||||
uint32_t *bp = (uint32_t *)b;
|
||||
|
||||
for (size /= sizeof(uint32_t); size-- != 0;) {
|
||||
uint32_t t = *ap;
|
||||
*ap++ = *bp;
|
||||
*bp++ = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void exchange_one_int32(void *a, void *b, size_t size) {
|
||||
uint32_t *ap = (uint32_t *)a;
|
||||
uint32_t *bp = (uint32_t *)b;
|
||||
uint32_t t = *ap;
|
||||
*ap = *bp;
|
||||
*bp = t;
|
||||
}
|
||||
|
||||
static void exchange_int64s(void *a, void *b, size_t size) {
|
||||
uint64_t *ap = (uint64_t *)a;
|
||||
uint64_t *bp = (uint64_t *)b;
|
||||
|
||||
for (size /= sizeof(uint64_t); size-- != 0;) {
|
||||
uint64_t t = *ap;
|
||||
*ap++ = *bp;
|
||||
*bp++ = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void exchange_one_int64(void *a, void *b, size_t size) {
|
||||
uint64_t *ap = (uint64_t *)a;
|
||||
uint64_t *bp = (uint64_t *)b;
|
||||
uint64_t t = *ap;
|
||||
*ap = *bp;
|
||||
*bp = t;
|
||||
}
|
||||
|
||||
static void exchange_int128s(void *a, void *b, size_t size) {
|
||||
uint64_t *ap = (uint64_t *)a;
|
||||
uint64_t *bp = (uint64_t *)b;
|
||||
|
||||
for (size /= sizeof(uint64_t) * 2; size-- != 0; ap += 2, bp += 2) {
|
||||
uint64_t t = ap[0];
|
||||
uint64_t u = ap[1];
|
||||
ap[0] = bp[0];
|
||||
ap[1] = bp[1];
|
||||
bp[0] = t;
|
||||
bp[1] = u;
|
||||
}
|
||||
}
|
||||
|
||||
static void exchange_one_int128(void *a, void *b, size_t size) {
|
||||
uint64_t *ap = (uint64_t *)a;
|
||||
uint64_t *bp = (uint64_t *)b;
|
||||
uint64_t t = ap[0];
|
||||
uint64_t u = ap[1];
|
||||
ap[0] = bp[0];
|
||||
ap[1] = bp[1];
|
||||
bp[0] = t;
|
||||
bp[1] = u;
|
||||
}
|
||||
|
||||
static inline exchange_f exchange_func(const void *base, size_t size) {
|
||||
switch (((uintptr_t)base | (uintptr_t)size) & 15) {
|
||||
case 0:
|
||||
if (size == sizeof(uint64_t) * 2)
|
||||
return exchange_one_int128;
|
||||
else
|
||||
return exchange_int128s;
|
||||
case 8:
|
||||
if (size == sizeof(uint64_t))
|
||||
return exchange_one_int64;
|
||||
else
|
||||
return exchange_int64s;
|
||||
case 4:
|
||||
case 12:
|
||||
if (size == sizeof(uint32_t))
|
||||
return exchange_one_int32;
|
||||
else
|
||||
return exchange_int32s;
|
||||
case 2:
|
||||
case 6:
|
||||
case 10:
|
||||
case 14:
|
||||
if (size == sizeof(uint16_t))
|
||||
return exchange_one_int16;
|
||||
else
|
||||
return exchange_int16s;
|
||||
default:
|
||||
if (size == 1)
|
||||
return exchange_one_byte;
|
||||
else
|
||||
return exchange_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
static void heapsortx(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
|
||||
{
|
||||
uint8_t *basep = (uint8_t *)base;
|
||||
size_t i, n, c, r;
|
||||
exchange_f swap = exchange_func(base, size);
|
||||
|
||||
if (nmemb > 1) {
|
||||
i = (nmemb / 2) * size;
|
||||
n = nmemb * size;
|
||||
|
||||
while (i > 0) {
|
||||
i -= size;
|
||||
for (r = i; (c = r * 2 + size) < n; r = c) {
|
||||
if (c < n - size && cmp(basep + c, basep + c + size, opaque) <= 0)
|
||||
c += size;
|
||||
if (cmp(basep + r, basep + c, opaque) > 0)
|
||||
break;
|
||||
swap(basep + r, basep + c, size);
|
||||
}
|
||||
}
|
||||
for (i = n - size; i > 0; i -= size) {
|
||||
swap(basep, basep + i, size);
|
||||
|
||||
for (r = 0; (c = r * 2 + size) < i; r = c) {
|
||||
if (c < i - size && cmp(basep + c, basep + c + size, opaque) <= 0)
|
||||
c += size;
|
||||
if (cmp(basep + r, basep + c, opaque) > 0)
|
||||
break;
|
||||
swap(basep + r, basep + c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void *med3(void *a, void *b, void *c, cmp_f cmp, void *opaque)
|
||||
{
|
||||
return cmp(a, b, opaque) < 0 ?
|
||||
(cmp(b, c, opaque) < 0 ? b : (cmp(a, c, opaque) < 0 ? c : a )) :
|
||||
(cmp(b, c, opaque) > 0 ? b : (cmp(a, c, opaque) < 0 ? a : c ));
|
||||
}
|
||||
|
||||
/* pointer based version with local stack and insertion sort threshhold */
|
||||
void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
|
||||
{
|
||||
struct { uint8_t *base; size_t count; int depth; } stack[50], *sp = stack;
|
||||
uint8_t *ptr, *pi, *pj, *plt, *pgt, *top, *m;
|
||||
size_t m4, i, lt, gt, span, span2;
|
||||
int c, depth;
|
||||
exchange_f swap = exchange_func(base, size);
|
||||
exchange_f swap_block = exchange_func(base, size | 128);
|
||||
|
||||
if (nmemb < 2 || size <= 0)
|
||||
return;
|
||||
|
||||
sp->base = (uint8_t *)base;
|
||||
sp->count = nmemb;
|
||||
sp->depth = 0;
|
||||
sp++;
|
||||
|
||||
while (sp > stack) {
|
||||
sp--;
|
||||
ptr = sp->base;
|
||||
nmemb = sp->count;
|
||||
depth = sp->depth;
|
||||
|
||||
while (nmemb > 6) {
|
||||
if (++depth > 50) {
|
||||
/* depth check to ensure worst case logarithmic time */
|
||||
heapsortx(ptr, nmemb, size, cmp, opaque);
|
||||
nmemb = 0;
|
||||
break;
|
||||
}
|
||||
/* select median of 3 from 1/4, 1/2, 3/4 positions */
|
||||
/* should use median of 5 or 9? */
|
||||
m4 = (nmemb >> 2) * size;
|
||||
m = (uint8_t*)med3(ptr + m4, ptr + 2 * m4, ptr + 3 * m4, cmp, opaque);
|
||||
swap(ptr, m, size); /* move the pivot to the start or the array */
|
||||
i = lt = 1;
|
||||
pi = plt = ptr + size;
|
||||
gt = nmemb;
|
||||
pj = pgt = top = ptr + nmemb * size;
|
||||
for (;;) {
|
||||
while (pi < pj && (c = cmp(ptr, pi, opaque)) >= 0) {
|
||||
if (c == 0) {
|
||||
swap(plt, pi, size);
|
||||
lt++;
|
||||
plt += size;
|
||||
}
|
||||
i++;
|
||||
pi += size;
|
||||
}
|
||||
while (pi < (pj -= size) && (c = cmp(ptr, pj, opaque)) <= 0) {
|
||||
if (c == 0) {
|
||||
gt--;
|
||||
pgt -= size;
|
||||
swap(pgt, pj, size);
|
||||
}
|
||||
}
|
||||
if (pi >= pj)
|
||||
break;
|
||||
swap(pi, pj, size);
|
||||
i++;
|
||||
pi += size;
|
||||
}
|
||||
/* array has 4 parts:
|
||||
* from 0 to lt excluded: elements identical to pivot
|
||||
* from lt to pi excluded: elements smaller than pivot
|
||||
* from pi to gt excluded: elements greater than pivot
|
||||
* from gt to n excluded: elements identical to pivot
|
||||
*/
|
||||
/* move elements identical to pivot in the middle of the array: */
|
||||
/* swap values in ranges [0..lt[ and [i-lt..i[
|
||||
swapping the smallest span between lt and i-lt is sufficient
|
||||
*/
|
||||
span = plt - ptr;
|
||||
span2 = pi - plt;
|
||||
lt = i - lt;
|
||||
if (span > span2)
|
||||
span = span2;
|
||||
swap_block(ptr, pi - span, span);
|
||||
/* swap values in ranges [gt..top[ and [i..top-(top-gt)[
|
||||
swapping the smallest span between top-gt and gt-i is sufficient
|
||||
*/
|
||||
span = top - pgt;
|
||||
span2 = pgt - pi;
|
||||
pgt = top - span2;
|
||||
gt = nmemb - (gt - i);
|
||||
if (span > span2)
|
||||
span = span2;
|
||||
swap_block(pi, top - span, span);
|
||||
|
||||
/* now array has 3 parts:
|
||||
* from 0 to lt excluded: elements smaller than pivot
|
||||
* from lt to gt excluded: elements identical to pivot
|
||||
* from gt to n excluded: elements greater than pivot
|
||||
*/
|
||||
/* stack the larger segment and keep processing the smaller one
|
||||
to minimize stack use for pathological distributions */
|
||||
if (lt > nmemb - gt) {
|
||||
sp->base = ptr;
|
||||
sp->count = lt;
|
||||
sp->depth = depth;
|
||||
sp++;
|
||||
ptr = pgt;
|
||||
nmemb -= gt;
|
||||
} else {
|
||||
sp->base = pgt;
|
||||
sp->count = nmemb - gt;
|
||||
sp->depth = depth;
|
||||
sp++;
|
||||
nmemb = lt;
|
||||
}
|
||||
}
|
||||
/* Use insertion sort for small fragments */
|
||||
for (pi = ptr + size, top = ptr + nmemb * size; pi < top; pi += size) {
|
||||
for (pj = pi; pj > ptr && cmp(pj - size, pj, opaque) > 0; pj -= size)
|
||||
swap(pj, pj - size, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
297
third_party/quickjs_libregexp/cutils.h
vendored
Normal file
297
third_party/quickjs_libregexp/cutils.h
vendored
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
/*
|
||||
* C utilities
|
||||
*
|
||||
* Copyright (c) 2017 Fabrice Bellard
|
||||
* Copyright (c) 2018 Charlie Gordon
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#ifndef CUTILS_H
|
||||
#define CUTILS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* set if CPU is big endian */
|
||||
#undef WORDS_BIGENDIAN
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define force_inline inline __attribute__((always_inline))
|
||||
#define no_inline __attribute__((noinline))
|
||||
#define __maybe_unused __attribute__((unused))
|
||||
|
||||
#define xglue(x, y) x ## y
|
||||
#define glue(x, y) xglue(x, y)
|
||||
#define stringify(s) tostring(s)
|
||||
#define tostring(s) #s
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(type, field) ((size_t) &((type *)0)->field)
|
||||
#endif
|
||||
#ifndef countof
|
||||
#define countof(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#ifndef FALSE
|
||||
enum {
|
||||
FALSE = 0,
|
||||
TRUE = 1,
|
||||
};
|
||||
#endif
|
||||
|
||||
void pstrcpy(char *buf, int buf_size, const char *str);
|
||||
char *pstrcat(char *buf, int buf_size, const char *s);
|
||||
int strstart(const char *str, const char *val, const char **ptr);
|
||||
int has_suffix(const char *str, const char *suffix);
|
||||
|
||||
static inline int max_int(int a, int b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int min_int(int a, int b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline uint32_t max_uint32(uint32_t a, uint32_t b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline uint32_t min_uint32(uint32_t a, uint32_t b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int64_t max_int64(int64_t a, int64_t b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int64_t min_int64(int64_t a, int64_t b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int clz32(unsigned int a)
|
||||
{
|
||||
return __builtin_clz(a);
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int clz64(uint64_t a)
|
||||
{
|
||||
return __builtin_clzll(a);
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int ctz32(unsigned int a)
|
||||
{
|
||||
return __builtin_ctz(a);
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int ctz64(uint64_t a)
|
||||
{
|
||||
return __builtin_ctzll(a);
|
||||
}
|
||||
|
||||
struct __attribute__((packed)) packed_u64 {
|
||||
uint64_t v;
|
||||
};
|
||||
|
||||
struct __attribute__((packed)) packed_u32 {
|
||||
uint32_t v;
|
||||
};
|
||||
|
||||
struct __attribute__((packed)) packed_u16 {
|
||||
uint16_t v;
|
||||
};
|
||||
|
||||
static inline uint64_t get_u64(const uint8_t *tab)
|
||||
{
|
||||
return ((const struct packed_u64 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline int64_t get_i64(const uint8_t *tab)
|
||||
{
|
||||
return (int64_t)((const struct packed_u64 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline void put_u64(uint8_t *tab, uint64_t val)
|
||||
{
|
||||
((struct packed_u64 *)tab)->v = val;
|
||||
}
|
||||
|
||||
static inline uint32_t get_u32(const uint8_t *tab)
|
||||
{
|
||||
return ((const struct packed_u32 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline int32_t get_i32(const uint8_t *tab)
|
||||
{
|
||||
return (int32_t)((const struct packed_u32 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline void put_u32(uint8_t *tab, uint32_t val)
|
||||
{
|
||||
((struct packed_u32 *)tab)->v = val;
|
||||
}
|
||||
|
||||
static inline uint32_t get_u16(const uint8_t *tab)
|
||||
{
|
||||
return ((const struct packed_u16 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline int32_t get_i16(const uint8_t *tab)
|
||||
{
|
||||
return (int16_t)((const struct packed_u16 *)tab)->v;
|
||||
}
|
||||
|
||||
static inline void put_u16(uint8_t *tab, uint16_t val)
|
||||
{
|
||||
((struct packed_u16 *)tab)->v = val;
|
||||
}
|
||||
|
||||
static inline uint32_t get_u8(const uint8_t *tab)
|
||||
{
|
||||
return *tab;
|
||||
}
|
||||
|
||||
static inline int32_t get_i8(const uint8_t *tab)
|
||||
{
|
||||
return (int8_t)*tab;
|
||||
}
|
||||
|
||||
static inline void put_u8(uint8_t *tab, uint8_t val)
|
||||
{
|
||||
*tab = val;
|
||||
}
|
||||
|
||||
static inline uint16_t bswap16(uint16_t x)
|
||||
{
|
||||
return (x >> 8) | (x << 8);
|
||||
}
|
||||
|
||||
static inline uint32_t bswap32(uint32_t v)
|
||||
{
|
||||
return ((v & 0xff000000) >> 24) | ((v & 0x00ff0000) >> 8) |
|
||||
((v & 0x0000ff00) << 8) | ((v & 0x000000ff) << 24);
|
||||
}
|
||||
|
||||
static inline uint64_t bswap64(uint64_t v)
|
||||
{
|
||||
return ((v & ((uint64_t)0xff << (7 * 8))) >> (7 * 8)) |
|
||||
((v & ((uint64_t)0xff << (6 * 8))) >> (5 * 8)) |
|
||||
((v & ((uint64_t)0xff << (5 * 8))) >> (3 * 8)) |
|
||||
((v & ((uint64_t)0xff << (4 * 8))) >> (1 * 8)) |
|
||||
((v & ((uint64_t)0xff << (3 * 8))) << (1 * 8)) |
|
||||
((v & ((uint64_t)0xff << (2 * 8))) << (3 * 8)) |
|
||||
((v & ((uint64_t)0xff << (1 * 8))) << (5 * 8)) |
|
||||
((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8));
|
||||
}
|
||||
|
||||
/* XXX: should take an extra argument to pass slack information to the caller */
|
||||
typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);
|
||||
|
||||
typedef struct DynBuf {
|
||||
uint8_t *buf;
|
||||
size_t size;
|
||||
size_t allocated_size;
|
||||
BOOL error; /* true if a memory allocation error occurred */
|
||||
DynBufReallocFunc *realloc_func;
|
||||
void *opaque; /* for realloc_func */
|
||||
} DynBuf;
|
||||
|
||||
void dbuf_init(DynBuf *s);
|
||||
void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func);
|
||||
int dbuf_realloc(DynBuf *s, size_t new_size);
|
||||
int dbuf_write(DynBuf *s, size_t offset, const uint8_t *data, size_t len);
|
||||
int dbuf_put(DynBuf *s, const uint8_t *data, size_t len);
|
||||
int dbuf_put_self(DynBuf *s, size_t offset, size_t len);
|
||||
int dbuf_putc(DynBuf *s, uint8_t c);
|
||||
int dbuf_putstr(DynBuf *s, const char *str);
|
||||
static inline int dbuf_put_u16(DynBuf *s, uint16_t val)
|
||||
{
|
||||
return dbuf_put(s, (uint8_t *)&val, 2);
|
||||
}
|
||||
static inline int dbuf_put_u32(DynBuf *s, uint32_t val)
|
||||
{
|
||||
return dbuf_put(s, (uint8_t *)&val, 4);
|
||||
}
|
||||
static inline int dbuf_put_u64(DynBuf *s, uint64_t val)
|
||||
{
|
||||
return dbuf_put(s, (uint8_t *)&val, 8);
|
||||
}
|
||||
int __attribute__((format(printf, 2, 3))) dbuf_printf(DynBuf *s,
|
||||
const char *fmt, ...);
|
||||
void dbuf_free(DynBuf *s);
|
||||
static inline BOOL dbuf_error(DynBuf *s) {
|
||||
return s->error;
|
||||
}
|
||||
static inline void dbuf_set_error(DynBuf *s)
|
||||
{
|
||||
s->error = TRUE;
|
||||
}
|
||||
|
||||
#define UTF8_CHAR_LEN_MAX 6
|
||||
|
||||
int unicode_to_utf8(uint8_t *buf, unsigned int c);
|
||||
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
|
||||
|
||||
static inline int from_hex(int c)
|
||||
{
|
||||
if (c >= '0' && c <= '9')
|
||||
return c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
return c - 'A' + 10;
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
return c - 'a' + 10;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
void rqsort(void *base, size_t nmemb, size_t size,
|
||||
int (*cmp)(const void *, const void *, void *),
|
||||
void *arg);
|
||||
|
||||
#endif /* CUTILS_H */
|
||||
58
third_party/quickjs_libregexp/libregexp-opcode.h
vendored
Normal file
58
third_party/quickjs_libregexp/libregexp-opcode.h
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Regular Expression Engine
|
||||
*
|
||||
* Copyright (c) 2017-2018 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef DEF
|
||||
|
||||
DEF(invalid, 1) /* never used */
|
||||
DEF(char, 3)
|
||||
DEF(char32, 5)
|
||||
DEF(dot, 1)
|
||||
DEF(any, 1) /* same as dot but match any character including line terminator */
|
||||
DEF(line_start, 1)
|
||||
DEF(line_end, 1)
|
||||
DEF(goto, 5)
|
||||
DEF(split_goto_first, 5)
|
||||
DEF(split_next_first, 5)
|
||||
DEF(match, 1)
|
||||
DEF(save_start, 2) /* save start position */
|
||||
DEF(save_end, 2) /* save end position, must come after saved_start */
|
||||
DEF(save_reset, 3) /* reset save positions */
|
||||
DEF(loop, 5) /* decrement the top the stack and goto if != 0 */
|
||||
DEF(push_i32, 5) /* push integer on the stack */
|
||||
DEF(drop, 1)
|
||||
DEF(word_boundary, 1)
|
||||
DEF(not_word_boundary, 1)
|
||||
DEF(back_reference, 2)
|
||||
DEF(backward_back_reference, 2) /* must come after back_reference */
|
||||
DEF(range, 3) /* variable length */
|
||||
DEF(range32, 3) /* variable length */
|
||||
DEF(lookahead, 5)
|
||||
DEF(negative_lookahead, 5)
|
||||
DEF(push_char_pos, 1) /* push the character position on the stack */
|
||||
DEF(bne_char_pos, 5) /* pop one stack element and jump if equal to the character
|
||||
position */
|
||||
DEF(prev, 1) /* go to the previous char */
|
||||
DEF(simple_greedy_quant, 17)
|
||||
|
||||
#endif /* DEF */
|
||||
2609
third_party/quickjs_libregexp/libregexp.cpp
vendored
Normal file
2609
third_party/quickjs_libregexp/libregexp.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
92
third_party/quickjs_libregexp/libregexp.h
vendored
Normal file
92
third_party/quickjs_libregexp/libregexp.h
vendored
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Regular Expression Engine
|
||||
*
|
||||
* Copyright (c) 2017-2018 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#ifndef LIBREGEXP_H
|
||||
#define LIBREGEXP_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "libunicode.h"
|
||||
|
||||
#define LRE_BOOL int /* for documentation purposes */
|
||||
|
||||
#define LRE_FLAG_GLOBAL (1 << 0)
|
||||
#define LRE_FLAG_IGNORECASE (1 << 1)
|
||||
#define LRE_FLAG_MULTILINE (1 << 2)
|
||||
#define LRE_FLAG_DOTALL (1 << 3)
|
||||
#define LRE_FLAG_UTF16 (1 << 4)
|
||||
#define LRE_FLAG_STICKY (1 << 5)
|
||||
|
||||
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
|
||||
|
||||
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
||||
const char *buf, size_t buf_len, int re_flags,
|
||||
void *opaque);
|
||||
int lre_get_capture_count(const uint8_t *bc_buf);
|
||||
int lre_get_flags(const uint8_t *bc_buf);
|
||||
const char *lre_get_groupnames(const uint8_t *bc_buf);
|
||||
int lre_exec(uint8_t **capture,
|
||||
const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen,
|
||||
int cbuf_type, void *opaque);
|
||||
|
||||
int lre_parse_escape(const uint8_t **pp, int allow_utf16);
|
||||
LRE_BOOL lre_is_space(int c);
|
||||
|
||||
/* must be provided by the user */
|
||||
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size);
|
||||
void *lre_realloc(void *opaque, void *ptr, size_t size);
|
||||
|
||||
/* JS identifier test */
|
||||
extern uint32_t const lre_id_start_table_ascii[4];
|
||||
extern uint32_t const lre_id_continue_table_ascii[4];
|
||||
|
||||
static inline int lre_js_is_ident_first(int c)
|
||||
{
|
||||
if ((uint32_t)c < 128) {
|
||||
return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1;
|
||||
} else {
|
||||
#ifdef CONFIG_ALL_UNICODE
|
||||
return lre_is_id_start(c);
|
||||
#else
|
||||
return !lre_is_space(c);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline int lre_js_is_ident_next(int c)
|
||||
{
|
||||
if ((uint32_t)c < 128) {
|
||||
return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1;
|
||||
} else {
|
||||
/* ZWNJ and ZWJ are accepted in identifiers */
|
||||
#ifdef CONFIG_ALL_UNICODE
|
||||
return lre_is_id_continue(c) || c == 0x200C || c == 0x200D;
|
||||
#else
|
||||
return !lre_is_space(c) || c == 0x200C || c == 0x200D;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#undef LRE_BOOL
|
||||
|
||||
#endif /* LIBREGEXP_H */
|
||||
4368
third_party/quickjs_libregexp/libunicode-table.h
vendored
Normal file
4368
third_party/quickjs_libregexp/libunicode-table.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1556
third_party/quickjs_libregexp/libunicode.cpp
vendored
Normal file
1556
third_party/quickjs_libregexp/libunicode.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
124
third_party/quickjs_libregexp/libunicode.h
vendored
Normal file
124
third_party/quickjs_libregexp/libunicode.h
vendored
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Unicode utilities
|
||||
*
|
||||
* Copyright (c) 2017-2018 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#ifndef LIBUNICODE_H
|
||||
#define LIBUNICODE_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#define LRE_BOOL int /* for documentation purposes */
|
||||
|
||||
/* define it to include all the unicode tables (40KB larger) */
|
||||
#define CONFIG_ALL_UNICODE
|
||||
|
||||
#define LRE_CC_RES_LEN_MAX 3
|
||||
|
||||
typedef enum {
|
||||
UNICODE_NFC,
|
||||
UNICODE_NFD,
|
||||
UNICODE_NFKC,
|
||||
UNICODE_NFKD,
|
||||
} UnicodeNormalizationEnum;
|
||||
|
||||
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
|
||||
LRE_BOOL lre_is_cased(uint32_t c);
|
||||
LRE_BOOL lre_is_case_ignorable(uint32_t c);
|
||||
|
||||
/* char ranges */
|
||||
|
||||
typedef struct {
|
||||
int len; /* in points, always even */
|
||||
int size;
|
||||
uint32_t *points; /* points sorted by increasing value */
|
||||
void *mem_opaque;
|
||||
void *(*realloc_func)(void *opaque, void *ptr, size_t size);
|
||||
} CharRange;
|
||||
|
||||
typedef enum {
|
||||
CR_OP_UNION,
|
||||
CR_OP_INTER,
|
||||
CR_OP_XOR,
|
||||
} CharRangeOpEnum;
|
||||
|
||||
void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
|
||||
void cr_free(CharRange *cr);
|
||||
int cr_realloc(CharRange *cr, int size);
|
||||
int cr_copy(CharRange *cr, const CharRange *cr1);
|
||||
|
||||
static inline int cr_add_point(CharRange *cr, uint32_t v)
|
||||
{
|
||||
if (cr->len >= cr->size) {
|
||||
if (cr_realloc(cr, cr->len + 1))
|
||||
return -1;
|
||||
}
|
||||
cr->points[cr->len++] = v;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
|
||||
{
|
||||
if ((cr->len + 2) > cr->size) {
|
||||
if (cr_realloc(cr, cr->len + 2))
|
||||
return -1;
|
||||
}
|
||||
cr->points[cr->len++] = c1;
|
||||
cr->points[cr->len++] = c2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len);
|
||||
|
||||
static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
|
||||
{
|
||||
uint32_t b_pt[2];
|
||||
b_pt[0] = c1;
|
||||
b_pt[1] = c2 + 1;
|
||||
return cr_union1(cr, b_pt, 2);
|
||||
}
|
||||
|
||||
int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
|
||||
const uint32_t *b_pt, int b_len, int op);
|
||||
|
||||
int cr_invert(CharRange *cr);
|
||||
|
||||
#ifdef CONFIG_ALL_UNICODE
|
||||
|
||||
LRE_BOOL lre_is_id_start(uint32_t c);
|
||||
LRE_BOOL lre_is_id_continue(uint32_t c);
|
||||
|
||||
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
|
||||
UnicodeNormalizationEnum n_type,
|
||||
void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
|
||||
|
||||
/* Unicode character range functions */
|
||||
|
||||
int unicode_script(CharRange *cr,
|
||||
const char *script_name, LRE_BOOL is_ext);
|
||||
int unicode_general_category(CharRange *cr, const char *gc_name);
|
||||
int unicode_prop(CharRange *cr, const char *prop_name);
|
||||
|
||||
#endif /* CONFIG_ALL_UNICODE */
|
||||
|
||||
#undef LRE_BOOL
|
||||
|
||||
#endif /* LIBUNICODE_H */
|
||||
Loading…
Add table
Add a link
Reference in a new issue