Update yarr source to webkitgtk-2.44.2

Signed-off-by: Seonghyun Kim <sh8281.kim@samsung.com>
This commit is contained in:
Seonghyun Kim 2024-07-09 18:08:15 +09:00 committed by Hyukwoo Park
commit 4b8024efb7
37 changed files with 19011 additions and 8952 deletions

View file

@ -22,7 +22,7 @@ ENDIF()
# Default options per compiler
IF (${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC" OR ${COMPILER_CLANG_CL})
SET (ESCARGOT_CXXFLAGS /std:c++17 /fp:strict /Zc:__cplusplus /EHs /source-charset:utf-8 /MP /D_CRT_SECURE_NO_WARNINGS /DGC_NOT_DLL /D_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING /wd4244 /wd4267 /wd4805 /wd4018 /wd4172)
SET (ESCARGOT_CXXFLAGS /std:c++17 /fp:strict /Zc:__cplusplus /EHs /source-charset:utf-8 /MP /D_CRT_SECURE_NO_WARNINGS /DGC_NOT_DLL /D_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING /wd4244 /wd4267 /wd4805 /wd4018 /wd4172 /wd4146)
SET (ESCARGOT_CXXFLAGS_RELEASE /O2 /Oy-)
SET (ESCARGOT_THIRDPARTY_CFLAGS /D_CRT_SECURE_NO_WARNINGS /DGC_NOT_DLL /Oy- /wd4146 /EHs)
IF (${COMPILER_CLANG_CL})

View file

@ -263,7 +263,7 @@ RegExpObject::RegExpCacheEntry& RegExpObject::getCacheEntryAndCompileIfNeeded(Ex
JSC::Yarr::YarrPattern* yarrPattern = nullptr;
try {
JSC::Yarr::ErrorCode errorCode = JSC::Yarr::ErrorCode::NoError;
yarrPattern = JSC::Yarr::YarrPattern::createYarrPattern(source, (JSC::Yarr::RegExpFlags)option, errorCode);
yarrPattern = new JSC::Yarr::YarrPattern(source, WTF::OptionSet<JSC::Yarr::Flags>((JSC::Yarr::Flags)option), errorCode);
yarrError = JSC::Yarr::errorMessage(errorCode);
} catch (const std::bad_alloc& e) {
ErrorObject::throwBuiltinError(state, ErrorCode::TypeError, "got too complicated RegExp pattern to process");
@ -301,7 +301,8 @@ bool RegExpObject::match(ExecutionState& state, String* str, RegexMatchResult& m
m_bytecodePattern = entry.m_bytecodePattern;
} else {
WTF::BumpPointerAllocator* bumpAlloc = ThreadLocal::bumpPointerAllocator();
std::unique_ptr<JSC::Yarr::BytecodePattern> ownedBytecode = JSC::Yarr::byteCompile(*m_yarrPattern, bumpAlloc);
JSC::Yarr::ErrorCode errorCode = JSC::Yarr::ErrorCode::NoError;
std::unique_ptr<JSC::Yarr::BytecodePattern> ownedBytecode = JSC::Yarr::byteCompile(*m_yarrPattern, bumpAlloc, errorCode);
m_bytecodePattern = ownedBytecode.release();
entry.m_bytecodePattern = m_bytecodePattern;
}
@ -464,16 +465,15 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const
}
}
if (m_yarrPattern->m_namedGroupToParenIndex.empty()) {
if (m_yarrPattern->m_namedGroupToParenIndices.empty()) {
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(), ObjectPropertyDescriptor::AllPresent));
} else {
Object* groups = new Object(state);
groups->setPrototype(state, Value(Value::Null));
Object* groups = new Object(state, Object::PrototypeIsNull);
for (auto it = m_yarrPattern->m_captureGroupNames.begin(); it != m_yarrPattern->m_captureGroupNames.end(); ++it) {
auto foundMapElement = m_yarrPattern->m_namedGroupToParenIndex.find(*it);
if (foundMapElement != m_yarrPattern->m_namedGroupToParenIndex.end()) {
auto foundMapElement = m_yarrPattern->m_namedGroupToParenIndices.find(*it);
if (foundMapElement != m_yarrPattern->m_namedGroupToParenIndices.end()) {
groups->directDefineOwnProperty(state, ObjectPropertyName(state, it->impl()),
ObjectPropertyDescriptor(arr->getOwnProperty(state, ObjectPropertyName(state, foundMapElement->second)).value(state, this), ObjectPropertyDescriptor::AllPresent));
ObjectPropertyDescriptor(arr->getOwnProperty(state, ObjectPropertyName(state, foundMapElement->second[0])).value(state, this), ObjectPropertyDescriptor::AllPresent));
}
}
arr->directDefineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent));
@ -535,70 +535,90 @@ Value RegExpObject::regexpFlagsValue(ExecutionState& state, Object* obj)
String* RegExpObject::computeRegExpOptionString(ExecutionState& state, Object* obj)
{
char flags[7] = { 0 };
char flags[8] = { 0 };
size_t flagsIdx = 0;
size_t cacheIndex = 0;
if (!hasOwnRegExpProperty(state, obj)) {
auto opt = obj->asRegExpObject()->option();
if (opt & RegExpObject::Option::HasIndices) {
flags[flagsIdx++] = 'd';
cacheIndex |= 1 << 0;
}
if (opt & RegExpObject::Option::Global) {
flags[flagsIdx++] = 'g';
cacheIndex |= 1 << 0;
cacheIndex |= 1 << 1;
}
if (opt & RegExpObject::Option::IgnoreCase) {
flags[flagsIdx++] = 'i';
cacheIndex |= 1 << 1;
cacheIndex |= 1 << 2;
}
if (opt & RegExpObject::Option::MultiLine) {
flags[flagsIdx++] = 'm';
cacheIndex |= 1 << 2;
cacheIndex |= 1 << 3;
}
if (opt & RegExpObject::Option::DotAll) {
flags[flagsIdx++] = 's';
cacheIndex |= 1 << 3;
cacheIndex |= 1 << 4;
}
if (opt & RegExpObject::Option::Unicode) {
flags[flagsIdx++] = 'u';
cacheIndex |= 1 << 4;
cacheIndex |= 1 << 5;
}
if (opt & RegExpObject::Option::UnicodeSets) {
flags[flagsIdx++] = 'v';
cacheIndex |= 1 << 6;
}
if (opt & RegExpObject::Option::Sticky) {
flags[flagsIdx++] = 'y';
cacheIndex |= 1 << 5;
cacheIndex |= 1 << 7;
}
} else {
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().hasIndices)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'd';
cacheIndex |= 1 << 0;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().global)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'g';
cacheIndex |= 1 << 0;
cacheIndex |= 1 << 1;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().ignoreCase)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'i';
cacheIndex |= 1 << 1;
cacheIndex |= 1 << 2;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().multiline)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'm';
cacheIndex |= 1 << 2;
cacheIndex |= 1 << 3;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().dotAll)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 's';
cacheIndex |= 1 << 3;
cacheIndex |= 1 << 4;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().unicode)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'u';
cacheIndex |= 1 << 4;
cacheIndex |= 1 << 5;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().unicodeSets)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'v';
cacheIndex |= 1 << 6;
}
if (obj->get(state, ObjectPropertyName(state, state.context()->staticStrings().sticky)).value(state, obj).toBoolean()) {
flags[flagsIdx++] = 'y';
cacheIndex |= 1 << 5;
cacheIndex |= 1 << 7;
}
}

View file

@ -50,12 +50,14 @@ class RegExpObject : public DerivedObject {
public:
enum Option ENSURE_ENUM_UNSIGNED {
None = 0 << 0,
Global = 1 << 0,
IgnoreCase = 1 << 1,
MultiLine = 1 << 2,
Sticky = 1 << 3,
Unicode = 1 << 4,
DotAll = 1 << 5,
HasIndices = 1 << 0,
Global = 1 << 1,
IgnoreCase = 1 << 2,
MultiLine = 1 << 3,
DotAll = 1 << 4,
Unicode = 1 << 5,
UnicodeSets = 1 << 6,
Sticky = 1 << 7,
};
struct RegExpCacheKey {

View file

@ -278,6 +278,7 @@ namespace Escargot {
F(grow) \
F(growable) \
F(has) \
F(hasIndices) \
F(hasInstance) \
F(hasOwn) \
F(hasOwnProperty) \
@ -480,6 +481,7 @@ namespace Escargot {
F(undefined) \
F(unescape) \
F(unicode) \
F(unicodeSets) \
F(unregister) \
F(unscopables) \
F(unshift) \

View file

@ -378,8 +378,8 @@ VMInstance::VMInstance(const char* locale, const char* timezone, const char* bas
m_toStringRecursionPreventer = new ToStringRecursionPreventer();
m_regexpCache = new (GC) RegExpCacheMap();
m_regexpOptionStringCache = (ASCIIString**)GC_MALLOC(64 * sizeof(ASCIIString*));
memset(m_regexpOptionStringCache, 0, 64 * sizeof(ASCIIString*));
m_regexpOptionStringCache = (ASCIIString**)GC_MALLOC(256 * sizeof(ASCIIString*));
memset(m_regexpOptionStringCache, 0, 256 * sizeof(ASCIIString*));
#if defined(ENABLE_ICU)
m_calendar = nullptr;

View file

@ -102,6 +102,19 @@ public:
return !operator==(other);
}
T* operator->()
{
ASSERT(hasValue());
return &m_value;
}
const T* operator->() const
{
ASSERT(hasValue());
return &m_value;
}
protected:
bool m_hasValue;
T m_value;

View file

@ -242,6 +242,11 @@ public:
pushBack(val);
}
void append(const T& val)
{
pushBack(val);
}
void insert(size_t pos, const T& val)
{
ASSERT(pos <= m_size);

View file

@ -679,6 +679,44 @@ public:
return !(*this == rhs);
}
operator T() const
{
if (UNLIKELY(this->hasOverflowed())) {
CRASH();
}
return m_value;
}
template <typename U = T>
U value() const
{
if (UNLIKELY(this->hasOverflowed())) {
CRASH();
}
return static_cast<U>(m_value);
}
// Other comparisons
template <typename V> bool operator<(Checked<T, V> rhs) const
{
return value() < rhs.value();
}
template <typename V> bool operator<=(Checked<T, V> rhs) const
{
return value() <= rhs.value();
}
template <typename V> bool operator>(Checked<T, V> rhs) const
{
return value() > rhs.value();
}
template <typename V> bool operator>=(Checked<T, V> rhs) const
{
return value() >= rhs.value();
}
private:
// Disallow implicit conversion of floating point to integer types
Checked(float);

425
third_party/yarr/BitSet.h vendored Normal file
View file

@ -0,0 +1,425 @@
/*
* Copyright (C) 2010-2023 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#pragma once
#include <array>
#include <string.h>
#include <type_traits>
namespace WTF {
template<size_t size>
using BitSetWordType = std::conditional_t<(size <= 32 && sizeof(UCPURegister) > sizeof(uint32_t)), uint32_t, UCPURegister>;
template<size_t bitSetSize, typename PassedWordType = BitSetWordType<bitSetSize>>
class BitSet final {
WTF_MAKE_FAST_ALLOCATED;
public:
using WordType = PassedWordType;
static_assert(sizeof(WordType) <= sizeof(UCPURegister), "WordType must not be bigger than the CPU atomic word size");
BitSet() = default;
static size_t size()
{
return bitSetSize;
}
bool get(size_t) const;
void set(size_t);
void set(size_t, bool);
bool testAndSet(size_t); // Returns the previous bit value.
bool testAndClear(size_t); // Returns the previous bit value.
size_t nextPossiblyUnset(size_t) const;
void clear(size_t);
void clearAll();
void setAll();
void invert();
int64_t findRunOfZeros(size_t runLength) const;
size_t count(size_t start = 0) const;
bool isEmpty() const;
bool isFull() const;
void merge(const BitSet&);
void filter(const BitSet&);
void exclude(const BitSet&);
bool subsumes(const BitSet&) const;
size_t findBit(size_t startIndex, bool value) const;
class iterator {
WTF_MAKE_FAST_ALLOCATED;
public:
iterator()
: m_bitSet(nullptr)
, m_index(0)
{
}
iterator(const BitSet& bitSet, size_t index)
: m_bitSet(&bitSet)
, m_index(index)
{
}
size_t operator*() const { return m_index; }
iterator& operator++()
{
m_index = m_bitSet->findBit(m_index + 1, true);
return *this;
}
bool operator==(const iterator& other) const
{
return m_index == other.m_index;
}
bool operator!=(const iterator& other) const
{
return !operator==(other);
}
private:
const BitSet* m_bitSet;
size_t m_index;
};
// Use this to iterate over set bits.
iterator begin() const { return iterator(*this, findBit(0, true)); }
iterator end() const { return iterator(*this, bitSetSize); }
void mergeAndClear(BitSet&);
void setAndClear(BitSet&);
void setEachNthBit(size_t n, size_t start = 0, size_t end = bitSetSize);
bool operator==(const BitSet&) const;
void operator|=(const BitSet&);
void operator&=(const BitSet&);
void operator^=(const BitSet&);
WordType* storage() { return bits.data(); }
const WordType* storage() const { return bits.data(); }
size_t storageLengthInBytes() { return sizeof(bits); }
private:
void cleanseLastWord();
static constexpr unsigned wordSize = sizeof(WordType) * 8;
static constexpr unsigned words = (bitSetSize + wordSize - 1) / wordSize;
// the literal '1' is of type signed int. We want to use an unsigned
// version of the correct size when doing the calculations because if
// WordType is larger than int, '1 << 31' will first be sign extended
// and then casted to unsigned, meaning that set(31) when WordType is
// a 64 bit unsigned int would give 0xffff8000
static constexpr WordType one = 1;
std::array<WordType, words> bits { };
};
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::get(size_t n) const
{
return !!(bits[n / wordSize] & (one << (n % wordSize)));
}
template<size_t bitSetSize, typename WordType>
ALWAYS_INLINE void BitSet<bitSetSize, WordType>::set(size_t n)
{
bits[n / wordSize] |= (one << (n % wordSize));
}
template<size_t bitSetSize, typename WordType>
ALWAYS_INLINE void BitSet<bitSetSize, WordType>::set(size_t n, bool value)
{
if (value)
set(n);
else
clear(n);
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::testAndSet(size_t n)
{
WordType mask = one << (n % wordSize);
size_t index = n / wordSize;
bool previousValue = bits[index] & mask;
bits[index] |= mask;
return previousValue;
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::testAndClear(size_t n)
{
WordType mask = one << (n % wordSize);
size_t index = n / wordSize;
bool previousValue = bits[index] & mask;
bits[index] &= ~mask;
return previousValue;
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::clear(size_t n)
{
bits[n / wordSize] &= ~(one << (n % wordSize));
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::clearAll()
{
memset(bits.data(), 0, sizeof(bits));
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::cleanseLastWord()
{
if (!!(bitSetSize % wordSize)) {
size_t remainingBits = bitSetSize % wordSize;
WordType mask = (static_cast<WordType>(1) << remainingBits) - 1;
bits[words - 1] &= mask;
}
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::setAll()
{
memset(bits.data(), 0xFF, sizeof(bits));
cleanseLastWord();
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::invert()
{
for (size_t i = 0; i < words; ++i)
bits[i] = ~bits[i];
cleanseLastWord();
}
template<size_t bitSetSize, typename WordType>
inline size_t BitSet<bitSetSize, WordType>::nextPossiblyUnset(size_t start) const
{
if (!~bits[start / wordSize])
return ((start / wordSize) + 1) * wordSize;
return start + 1;
}
template<size_t bitSetSize, typename WordType>
inline int64_t BitSet<bitSetSize, WordType>::findRunOfZeros(size_t runLength) const
{
if (!runLength)
runLength = 1;
if (runLength > bitSetSize)
return -1;
for (size_t i = 0; i <= (bitSetSize - runLength) ; i++) {
bool found = true;
for (size_t j = i; j <= (i + runLength - 1) ; j++) {
if (get(j)) {
found = false;
break;
}
}
if (found)
return i;
}
return -1;
}
template<size_t bitSetSize, typename WordType>
inline size_t BitSet<bitSetSize, WordType>::count(size_t start) const
{
size_t result = 0;
for ( ; (start % wordSize); ++start) {
if (get(start))
++result;
}
for (size_t i = start / wordSize; i < words; ++i)
result += WTF::bitCount(bits[i]);
return result;
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::isEmpty() const
{
for (size_t i = 0; i < words; ++i)
if (bits[i])
return false;
return true;
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::isFull() const
{
for (size_t i = 0; i < words; ++i)
if (~bits[i]) {
if (!!(bitSetSize % wordSize)) {
if (i == words - 1) {
size_t remainingBits = bitSetSize % wordSize;
WordType mask = (static_cast<WordType>(1) << remainingBits) - 1;
if ((bits[i] & mask) == mask)
return true;
}
}
return false;
}
return true;
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::merge(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] |= other.bits[i];
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::filter(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] &= other.bits[i];
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::exclude(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] &= ~other.bits[i];
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::subsumes(const BitSet& other) const
{
for (size_t i = 0; i < words; ++i) {
WordType myBits = bits[i];
WordType otherBits = other.bits[i];
if ((myBits | otherBits) != myBits)
return false;
}
return true;
}
template<size_t bitSetSize, typename WordType>
inline size_t BitSet<bitSetSize, WordType>::findBit(size_t startIndex, bool value) const
{
WordType skipValue = -(static_cast<WordType>(value) ^ 1);
size_t wordIndex = startIndex / wordSize;
size_t startIndexInWord = startIndex - wordIndex * wordSize;
while (wordIndex < words) {
WordType word = bits[wordIndex];
if (word != skipValue) {
size_t index = startIndexInWord;
if (findBitInWord(word, index, wordSize, value))
return wordIndex * wordSize + index;
}
wordIndex++;
startIndexInWord = 0;
}
return bitSetSize;
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::mergeAndClear(BitSet& other)
{
for (size_t i = 0; i < words; ++i) {
bits[i] |= other.bits[i];
other.bits[i] = 0;
}
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::setAndClear(BitSet& other)
{
for (size_t i = 0; i < words; ++i) {
bits[i] = other.bits[i];
other.bits[i] = 0;
}
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::setEachNthBit(size_t n, size_t start, size_t end)
{
ASSERT(start <= end);
ASSERT(end <= bitSetSize);
size_t wordIndex = start / wordSize;
size_t endWordIndex = end / wordSize;
size_t index = start - wordIndex * wordSize;
while (wordIndex < endWordIndex) {
while (index < wordSize) {
bits[wordIndex] |= (one << index);
index += n;
}
index -= wordSize;
wordIndex++;
}
size_t endIndex = end - endWordIndex * wordSize;
while (index < endIndex) {
bits[wordIndex] |= (one << index);
index += n;
}
cleanseLastWord();
}
template<size_t bitSetSize, typename WordType>
inline bool BitSet<bitSetSize, WordType>::operator==(const BitSet& other) const
{
for (size_t i = 0; i < words; ++i) {
if (bits[i] != other.bits[i])
return false;
}
return true;
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::operator|=(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] |= other.bits[i];
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::operator&=(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] &= other.bits[i];
}
template<size_t bitSetSize, typename WordType>
inline void BitSet<bitSetSize, WordType>::operator^=(const BitSet& other)
{
for (size_t i = 0; i < words; ++i)
bits[i] ^= other.bits[i];
}
} // namespace WTF
// We can't do "using WTF::BitSet;" here because there is a function in the macOS SDK named BitSet() already.

268
third_party/yarr/BitVector.cpp vendored Normal file
View file

@ -0,0 +1,268 @@
/*
* Copyright (C) 2011-2022 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "WTFBridge.h"
#include <algorithm>
#include <string.h>
namespace WTF {
void BitVector::setSlow(const BitVector& other)
{
uintptr_t newBitsOrPointer;
if (other.isInline() || other.isEmptyOrDeletedValue())
newBitsOrPointer = other.m_bitsOrPointer;
else {
OutOfLineBits* newOutOfLineBits = OutOfLineBits::create(other.size());
memcpy(newOutOfLineBits->bits(), other.bits(), byteCount(other.size()));
newBitsOrPointer = bitwise_cast<uintptr_t>(newOutOfLineBits) >> 1;
}
if (!isInline() && !isEmptyOrDeletedValue())
OutOfLineBits::destroy(outOfLineBits());
m_bitsOrPointer = newBitsOrPointer;
}
void BitVector::resize(size_t numBits)
{
if (numBits <= maxInlineBits()) {
if (isInline())
return;
OutOfLineBits* myOutOfLineBits = outOfLineBits();
m_bitsOrPointer = makeInlineBits(*myOutOfLineBits->bits());
OutOfLineBits::destroy(myOutOfLineBits);
return;
}
resizeOutOfLine(numBits);
}
void BitVector::clearAll()
{
if (isInline())
m_bitsOrPointer = makeInlineBits(0);
else
memset(outOfLineBits()->bits(), 0, byteCount(size()));
}
BitVector::OutOfLineBits* BitVector::OutOfLineBits::create(size_t numBits)
{
numBits = (numBits + bitsInPointer() - 1) & ~(static_cast<size_t>(bitsInPointer()) - 1);
size_t size = sizeof(OutOfLineBits) + sizeof(uintptr_t) * (numBits / bitsInPointer());
OutOfLineBits* result = new OutOfLineBits(numBits);
return result;
}
void BitVector::OutOfLineBits::destroy(OutOfLineBits* outOfLineBits)
{
free(outOfLineBits);
}
void BitVector::shiftRightByMultipleOf64(size_t shiftInBits)
{
RELEASE_ASSERT(!(shiftInBits % 64));
static_assert(!(8 % sizeof(void*)), "BitVector::shiftRightByMultipleOf64 assumes that word size is a divisor of 64");
size_t shiftInWords = shiftInBits / (8 * sizeof(void*));
size_t numBits = size() + shiftInBits;
resizeOutOfLine(numBits, shiftInWords);
}
void BitVector::resizeOutOfLine(size_t numBits, size_t shiftInWords)
{
ASSERT(numBits > maxInlineBits());
OutOfLineBits* newOutOfLineBits = OutOfLineBits::create(numBits);
size_t newNumWords = newOutOfLineBits->numWords();
if (isInline()) {
memset(newOutOfLineBits->bits(), 0, shiftInWords * sizeof(void*));
// Make sure that all of the bits are zero in case we do a no-op resize.
*(newOutOfLineBits->bits() + shiftInWords) = m_bitsOrPointer & ~(static_cast<uintptr_t>(1) << maxInlineBits());
RELEASE_ASSERT(shiftInWords + 1 <= newNumWords);
memset(newOutOfLineBits->bits() + shiftInWords + 1, 0, (newNumWords - 1 - shiftInWords) * sizeof(void*));
} else {
if (numBits > size()) {
size_t oldNumWords = outOfLineBits()->numWords();
memset(newOutOfLineBits->bits(), 0, shiftInWords * sizeof(void*));
memcpy(newOutOfLineBits->bits() + shiftInWords, outOfLineBits()->bits(), oldNumWords * sizeof(void*));
RELEASE_ASSERT(shiftInWords + oldNumWords <= newNumWords);
memset(newOutOfLineBits->bits() + shiftInWords + oldNumWords, 0, (newNumWords - oldNumWords - shiftInWords) * sizeof(void*));
} else
memcpy(newOutOfLineBits->bits(), outOfLineBits()->bits(), newOutOfLineBits->numWords() * sizeof(void*));
OutOfLineBits::destroy(outOfLineBits());
}
m_bitsOrPointer = bitwise_cast<uintptr_t>(newOutOfLineBits) >> 1;
}
void BitVector::mergeSlow(const BitVector& other)
{
if (other.isInline()) {
ASSERT(!isInline());
*bits() |= cleanseInlineBits(other.m_bitsOrPointer);
return;
}
ensureSize(other.size());
ASSERT(!isInline());
ASSERT(!other.isInline());
OutOfLineBits* a = outOfLineBits();
const OutOfLineBits* b = other.outOfLineBits();
for (unsigned i = a->numWords(); i--;)
a->bits()[i] |= b->bits()[i];
}
void BitVector::filterSlow(const BitVector& other)
{
if (other.isInline()) {
ASSERT(!isInline());
*bits() &= cleanseInlineBits(other.m_bitsOrPointer);
return;
}
if (isInline()) {
ASSERT(!other.isInline());
m_bitsOrPointer &= *other.outOfLineBits()->bits();
m_bitsOrPointer |= (static_cast<uintptr_t>(1) << maxInlineBits());
ASSERT(isInline());
return;
}
OutOfLineBits* a = outOfLineBits();
const OutOfLineBits* b = other.outOfLineBits();
for (unsigned i = std::min(a->numWords(), b->numWords()); i--;)
a->bits()[i] &= b->bits()[i];
for (unsigned i = b->numWords(); i < a->numWords(); ++i)
a->bits()[i] = 0;
}
void BitVector::excludeSlow(const BitVector& other)
{
if (other.isInline()) {
ASSERT(!isInline());
*bits() &= ~cleanseInlineBits(other.m_bitsOrPointer);
return;
}
if (isInline()) {
ASSERT(!other.isInline());
m_bitsOrPointer &= ~*other.outOfLineBits()->bits();
m_bitsOrPointer |= (static_cast<uintptr_t>(1) << maxInlineBits());
ASSERT(isInline());
return;
}
OutOfLineBits* a = outOfLineBits();
const OutOfLineBits* b = other.outOfLineBits();
for (unsigned i = std::min(a->numWords(), b->numWords()); i--;)
a->bits()[i] &= ~b->bits()[i];
}
size_t BitVector::bitCountSlow() const
{
ASSERT(!isInline());
const OutOfLineBits* bits = outOfLineBits();
size_t result = 0;
for (unsigned i = bits->numWords(); i--;)
result += bitCount(bits->bits()[i]);
return result;
}
bool BitVector::isEmptySlow() const
{
ASSERT(!isInline());
const OutOfLineBits* bits = outOfLineBits();
for (unsigned i = bits->numWords(); i--;) {
if (bits->bits()[i])
return false;
}
return true;
}
bool BitVector::equalsSlowCase(const BitVector& other) const
{
bool result = equalsSlowCaseFast(other);
ASSERT(result == equalsSlowCaseSimple(other));
return result;
}
bool BitVector::equalsSlowCaseFast(const BitVector& other) const
{
if (isInline() != other.isInline())
return equalsSlowCaseSimple(other);
const OutOfLineBits* myBits = outOfLineBits();
const OutOfLineBits* otherBits = other.outOfLineBits();
size_t myNumWords = myBits->numWords();
size_t otherNumWords = otherBits->numWords();
size_t minNumWords;
size_t maxNumWords;
const OutOfLineBits* longerBits;
if (myNumWords < otherNumWords) {
minNumWords = myNumWords;
maxNumWords = otherNumWords;
longerBits = otherBits;
} else {
minNumWords = otherNumWords;
maxNumWords = myNumWords;
longerBits = myBits;
}
for (size_t i = minNumWords; i < maxNumWords; ++i) {
if (longerBits->bits()[i])
return false;
}
for (size_t i = minNumWords; i--;) {
if (myBits->bits()[i] != otherBits->bits()[i])
return false;
}
return true;
}
bool BitVector::equalsSlowCaseSimple(const BitVector& other) const
{
// This is really cheesy, but probably good enough for now.
for (unsigned i = std::max(size(), other.size()); i--;) {
if (get(i) != other.get(i))
return false;
}
return true;
}
uintptr_t BitVector::hashSlowCase() const
{
ASSERT(!isInline());
const OutOfLineBits* bits = outOfLineBits();
uintptr_t result = 0;
for (unsigned i = bits->numWords(); i--;)
result ^= bits->bits()[i];
return result;
}
} // namespace WTF

475
third_party/yarr/BitVector.h vendored Normal file
View file

@ -0,0 +1,475 @@
/*
* Copyright (C) 2011-2023 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
namespace JSC {
class CachedBitVector;
}
namespace WTF {
class FixedBitVector;
// This is a space-efficient, resizeable bitvector class. In the common case it
// occupies one word, but if necessary, it will inflate this one word to point
// to a single chunk of out-of-line allocated storage to store an arbitrary number
// of bits.
//
// - The bitvector remembers the bound of how many bits can be stored, but this
// may be slightly greater (by as much as some platform-specific constant)
// than the last argument passed to ensureSize().
//
// - The bitvector can resize itself automatically (set, clear, get) or can be used
// in a manual mode, which is faster (quickSet, quickClear, quickGet, ensureSize).
//
// - Accesses ASSERT that you are within bounds.
//
// - Bits are automatically initialized to zero.
//
// On the other hand, this BitVector class may not be the fastest around, since
// it does conditionals on every get/set/clear. But it is great if you need to
// juggle a lot of variable-length BitVectors and you're worried about wasting
// space.
// If you know the length of the vector at compile-time,
// please consider using WTF::BitSet instead.
class BitVector final {
WTF_MAKE_FAST_ALLOCATED;
public:
BitVector()
: m_bitsOrPointer(makeInlineBits(0))
{
}
explicit BitVector(size_t numBits)
: m_bitsOrPointer(makeInlineBits(0))
{
ensureSize(numBits);
}
BitVector(const BitVector& other)
: m_bitsOrPointer(makeInlineBits(0))
{
(*this) = other;
}
~BitVector()
{
if (isInline())
return;
OutOfLineBits::destroy(outOfLineBits());
}
BitVector& operator=(const BitVector& other)
{
if (isInline() && other.isInline())
m_bitsOrPointer = other.m_bitsOrPointer;
else
setSlow(other);
return *this;
}
size_t size() const
{
if (isInline())
return maxInlineBits();
return outOfLineBits()->numBits();
}
void ensureSize(size_t numBits)
{
if (numBits <= size())
return;
resizeOutOfLine(numBits);
}
// Like ensureSize(), but supports reducing the size of the bitvector.
WTF_EXPORT_PRIVATE void resize(size_t numBits);
WTF_EXPORT_PRIVATE void clearAll();
bool quickGet(size_t bit) const
{
ASSERT_WITH_SECURITY_IMPLICATION(bit < size());
return !!(bits()[bit / bitsInPointer()] & (static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1))));
}
bool quickSet(size_t bit)
{
ASSERT_WITH_SECURITY_IMPLICATION(bit < size());
uintptr_t& word = bits()[bit / bitsInPointer()];
uintptr_t mask = static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1));
bool result = !!(word & mask);
word |= mask;
return result;
}
bool quickClear(size_t bit)
{
ASSERT_WITH_SECURITY_IMPLICATION(bit < size());
uintptr_t& word = bits()[bit / bitsInPointer()];
uintptr_t mask = static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1));
bool result = !!(word & mask);
word &= ~mask;
return result;
}
bool quickSet(size_t bit, bool value)
{
if (value)
return quickSet(bit);
return quickClear(bit);
}
bool get(size_t bit) const
{
if (bit >= size())
return false;
return quickGet(bit);
}
bool contains(size_t bit) const
{
return get(bit);
}
bool set(size_t bit)
{
ensureSize(bit + 1);
return quickSet(bit);
}
// This works like the add methods of sets. Instead of returning the previous value, like set(),
// it returns whether the bit transitioned from false to true.
bool add(size_t bit)
{
return !set(bit);
}
bool ensureSizeAndSet(size_t bit, size_t size)
{
ensureSize(size);
return quickSet(bit);
}
bool clear(size_t bit)
{
if (bit >= size())
return false;
return quickClear(bit);
}
bool remove(size_t bit)
{
return clear(bit);
}
bool set(size_t bit, bool value)
{
if (value)
return set(bit);
return clear(bit);
}
void merge(const BitVector& other)
{
if (!isInline() || !other.isInline()) {
mergeSlow(other);
return;
}
m_bitsOrPointer |= other.m_bitsOrPointer;
ASSERT(isInline());
}
void filter(const BitVector& other)
{
if (!isInline() || !other.isInline()) {
filterSlow(other);
return;
}
m_bitsOrPointer &= other.m_bitsOrPointer;
ASSERT(isInline());
}
void exclude(const BitVector& other)
{
if (!isInline() || !other.isInline()) {
excludeSlow(other);
return;
}
m_bitsOrPointer &= ~other.m_bitsOrPointer;
m_bitsOrPointer |= (static_cast<uintptr_t>(1) << maxInlineBits());
ASSERT(isInline());
}
size_t bitCount() const
{
if (isInline())
return bitCount(cleanseInlineBits(m_bitsOrPointer));
return bitCountSlow();
}
bool isEmpty() const
{
if (isInline())
return !cleanseInlineBits(m_bitsOrPointer);
return isEmptySlow();
}
size_t findBit(size_t index, bool value) const
{
size_t result = findBitFast(index, value);
return result;
}
enum EmptyValueTag { EmptyValue };
enum DeletedValueTag { DeletedValue };
BitVector(EmptyValueTag)
: m_bitsOrPointer(0)
{
}
BitVector(DeletedValueTag)
: m_bitsOrPointer(1)
{
}
bool isEmptyValue() const { return !m_bitsOrPointer; }
bool isDeletedValue() const { return m_bitsOrPointer == 1; }
bool isEmptyOrDeletedValue() const { return m_bitsOrPointer <= 1; }
bool operator==(const BitVector& other) const
{
if (isInline() && other.isInline())
return m_bitsOrPointer == other.m_bitsOrPointer;
return equalsSlowCase(other);
}
class iterator {
WTF_MAKE_FAST_ALLOCATED;
public:
iterator()
: m_bitVector(nullptr)
, m_index(0)
{
}
iterator(const BitVector& bitVector, size_t index)
: m_bitVector(&bitVector)
, m_index(index)
{
}
size_t operator*() const { return m_index; }
iterator& operator++()
{
m_index = m_bitVector->findBit(m_index + 1, true);
return *this;
}
iterator operator++(int)
{
iterator result = *this;
++(*this);
return result;
}
bool isAtEnd() const
{
return m_index >= m_bitVector->size();
}
bool operator==(const iterator& other) const
{
return m_index == other.m_index;
}
bool operator!=(const iterator& other) const
{
return !operator==(other);
}
private:
const BitVector* m_bitVector;
size_t m_index;
};
// Use this to iterate over set bits.
iterator begin() const { return iterator(*this, findBit(0, true)); }
iterator end() const { return iterator(*this, size()); }
unsigned outOfLineMemoryUse() const
{
if (isInline())
return 0;
return byteCount(size());
}
WTF_EXPORT_PRIVATE void shiftRightByMultipleOf64(size_t);
private:
friend class JSC::CachedBitVector;
friend class FixedBitVector;
static unsigned bitsInPointer()
{
return sizeof(void*) << 3;
}
static unsigned maxInlineBits()
{
return bitsInPointer() - 1;
}
static size_t byteCount(size_t bitCount)
{
return (bitCount + 7) >> 3;
}
static uintptr_t makeInlineBits(uintptr_t bits)
{
ASSERT(!(bits & (static_cast<uintptr_t>(1) << maxInlineBits())));
return bits | (static_cast<uintptr_t>(1) << maxInlineBits());
}
static uintptr_t cleanseInlineBits(uintptr_t bits)
{
return bits & ~(static_cast<uintptr_t>(1) << maxInlineBits());
}
static size_t bitCount(uintptr_t bits)
{
if (sizeof(uintptr_t) == 4)
return WTF::bitCount(static_cast<unsigned>(bits));
return WTF::bitCount(static_cast<uint64_t>(bits));
}
size_t findBitFast(size_t startIndex, bool value) const
{
if (isInline()) {
size_t index = startIndex;
findBitInWord(m_bitsOrPointer, index, maxInlineBits(), value);
return index;
}
const OutOfLineBits* bits = outOfLineBits();
// value = true: casts to 1, then xors to 0, then negates to 0.
// value = false: casts to 0, then xors to 1, then negates to -1 (i.e. all one bits).
uintptr_t skipValue = -(static_cast<uintptr_t>(value) ^ 1);
size_t numWords = bits->numWords();
size_t wordIndex = startIndex / bitsInPointer();
size_t startIndexInWord = startIndex - wordIndex * bitsInPointer();
while (wordIndex < numWords) {
uintptr_t word = bits->bits()[wordIndex];
if (word != skipValue) {
size_t index = startIndexInWord;
if (findBitInWord(word, index, bitsInPointer(), value))
return wordIndex * bitsInPointer() + index;
}
wordIndex++;
startIndexInWord = 0;
}
return bits->numBits();
}
size_t findBitSimple(size_t index, bool value) const
{
while (index < size()) {
if (get(index) == value)
return index;
index++;
}
return size();
}
class OutOfLineBits {
public:
size_t numBits() const { return m_numBits; }
size_t numWords() const { return (m_numBits + bitsInPointer() - 1) / bitsInPointer(); }
uintptr_t* bits() { return bitwise_cast<uintptr_t*>(this + 1); }
const uintptr_t* bits() const { return bitwise_cast<const uintptr_t*>(this + 1); }
static WTF_EXPORT_PRIVATE OutOfLineBits* create(size_t numBits);
static WTF_EXPORT_PRIVATE void destroy(OutOfLineBits*);
private:
OutOfLineBits(size_t numBits)
: m_numBits(numBits)
{
}
size_t m_numBits;
};
bool isInline() const { return m_bitsOrPointer >> maxInlineBits(); }
const OutOfLineBits* outOfLineBits() const { return bitwise_cast<const OutOfLineBits*>(m_bitsOrPointer << 1); }
OutOfLineBits* outOfLineBits() { return bitwise_cast<OutOfLineBits*>(m_bitsOrPointer << 1); }
WTF_EXPORT_PRIVATE void resizeOutOfLine(size_t numBits, size_t shiftInWords = 0);
WTF_EXPORT_PRIVATE void setSlow(const BitVector& other);
WTF_EXPORT_PRIVATE void mergeSlow(const BitVector& other);
WTF_EXPORT_PRIVATE void filterSlow(const BitVector& other);
WTF_EXPORT_PRIVATE void excludeSlow(const BitVector& other);
WTF_EXPORT_PRIVATE size_t bitCountSlow() const;
WTF_EXPORT_PRIVATE bool isEmptySlow() const;
WTF_EXPORT_PRIVATE bool equalsSlowCase(const BitVector& other) const;
bool equalsSlowCaseFast(const BitVector& other) const;
bool equalsSlowCaseSimple(const BitVector& other) const;
WTF_EXPORT_PRIVATE uintptr_t hashSlowCase() const;
uintptr_t* bits()
{
if (isInline())
return &m_bitsOrPointer;
return outOfLineBits()->bits();
}
const uintptr_t* bits() const
{
if (isInline())
return &m_bitsOrPointer;
return outOfLineBits()->bits();
}
uintptr_t m_bitsOrPointer;
};
} // namespace WTF
using WTF::BitVector;

135
third_party/yarr/OptionSet.h vendored Normal file
View file

@ -0,0 +1,135 @@
/*
* Copyright (C) 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <initializer_list>
#include <iterator>
#include <type_traits>
namespace WTF {
// OptionSet is a class that represents a set of enumerators in a space-efficient manner. The enumerators
// must be powers of two greater than 0. This class is useful as a replacement for passing a bitmask of
// enumerators around.
template<typename T> class OptionSet {
static_assert(std::is_enum<T>::value, "T is not an enum type");
typedef typename std::make_unsigned<typename std::underlying_type<T>::type>::type StorageType;
public:
template<typename StorageType> class Iterator {
public:
// Isolate the rightmost set bit.
T operator*() const { return static_cast<T>(m_value & -m_value); }
// Iterates from smallest to largest enum value by turning off the rightmost set bit.
Iterator& operator++()
{
m_value &= m_value - 1;
return *this;
}
Iterator& operator++(int) = delete;
bool operator==(const Iterator& other) const { return m_value == other.m_value; }
bool operator!=(const Iterator& other) const { return m_value != other.m_value; }
private:
Iterator(StorageType value) : m_value(value) { }
friend OptionSet;
StorageType m_value;
};
using iterator = Iterator<StorageType>;
static constexpr OptionSet fromRaw(StorageType storageType)
{
return OptionSet(static_cast<T>(storageType), FromRawValue);
}
constexpr OptionSet() = default;
OptionSet(T t)
: m_storage(static_cast<StorageType>(t))
{
}
// FIXME: Make this constexpr once we adopt C++14 as C++11 does not support for-loops
// in a constexpr function.
OptionSet(std::initializer_list<T> initializerList)
{
for (auto& option : initializerList) {
// "Enumerator is not a positive power of two."
ASSERT(hasOneBitSet(static_cast<StorageType>(option)));
m_storage |= static_cast<StorageType>(option);
}
}
constexpr StorageType toRaw() const { return m_storage; }
constexpr bool isEmpty() const { return !m_storage; }
constexpr iterator begin() const { return m_storage; }
constexpr iterator end() const { return 0; }
constexpr bool contains(OptionSet optionSet) const
{
return m_storage & optionSet.m_storage;
}
constexpr friend bool operator==(OptionSet lhs, OptionSet rhs)
{
return lhs.m_storage == rhs.m_storage;
}
constexpr friend bool operator!=(OptionSet lhs, OptionSet rhs)
{
return lhs.m_storage != rhs.m_storage;
}
friend OptionSet& operator|=(OptionSet& lhs, OptionSet rhs)
{
lhs.m_storage |= rhs.m_storage;
return lhs;
}
constexpr friend OptionSet operator-(OptionSet lhs, OptionSet rhs)
{
return OptionSet::fromRaw(lhs.m_storage & ~rhs.m_storage);
}
private:
enum InitializationTag { FromRawValue };
constexpr OptionSet(T t, InitializationTag)
: m_storage(static_cast<StorageType>(t))
{
}
StorageType m_storage { 0 };
};
}
using WTF::OptionSet;

View file

@ -36,7 +36,6 @@ WTF_EXPORT_PRIVATE size_t pageSize();
WTF_EXPORT_PRIVATE size_t pageMask();
inline bool isPageAligned(void* address) { return !(reinterpret_cast<intptr_t>(address) & (pageSize() - 1)); }
inline bool isPageAligned(size_t size) { return !(size & (pageSize() - 1)); }
inline bool isPowerOfTwo(size_t size) { return !(size & (size - 1)); }
class PageBlock {
public:
PageBlock();
@ -76,6 +75,5 @@ inline PageBlock::PageBlock(void* base, size_t size, bool hasGuardPages)
using WTF::pageSize;
using WTF::isPageAligned;
using WTF::isPageAligned;
using WTF::isPowerOfTwo;
#endif // PageBlock_h

View file

@ -1,6 +1,43 @@
std::unique_ptr<CharacterClass> wordcharCreate()
{
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f);
characterClass->m_ranges.append(CharacterRange(0x61, 0x7a));
characterClass->m_characterWidths = CharacterClassWidths::HasBMPChars;
return characterClass;
}
std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate()
{
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f);
characterClass->m_ranges.append(CharacterRange(0x61, 0x7a));
characterClass->m_matchesUnicode.append(0x017f);
characterClass->m_matchesUnicode.append(0x212a);
characterClass->m_characterWidths = CharacterClassWidths::HasBMPChars;
return characterClass;
}
std::unique_ptr<CharacterClass> nonwordcharCreate()
{
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x40));
characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e));
characterClass->m_matches.append(0x60);
characterClass->m_ranges.append(CharacterRange(0x7b, 0x7f));
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x10ffff));
characterClass->m_characterWidths = CharacterClassWidths::HasBothBMPAndNonBMP;
return characterClass;
}
std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x40));
characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e));
@ -9,21 +46,24 @@ std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate()
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x017e));
characterClass->m_rangesUnicode.append(CharacterRange(0x0180, 0x2129));
characterClass->m_rangesUnicode.append(CharacterRange(0x212b, 0x10ffff));
characterClass->m_hasNonBMPCharacters = true;
characterClass->m_characterWidths = CharacterClassWidths::HasBothBMPAndNonBMP;
return characterClass;
}
std::unique_ptr<CharacterClass> digitsCreate()
std::unique_ptr<CharacterClass> newlineCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_hasNonBMPCharacters = false;
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_matches.append(0x0a);
characterClass->m_matches.append(0x0d);
characterClass->m_matchesUnicode.append(0x2028);
characterClass->m_matchesUnicode.append(0x2029);
characterClass->m_characterWidths = CharacterClassWidths::HasBMPChars;
return characterClass;
}
std::unique_ptr<CharacterClass> spacesCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x09, 0x0d));
characterClass->m_matches.append(0x20);
characterClass->m_matchesUnicode.append(0x00a0);
@ -35,23 +75,13 @@ std::unique_ptr<CharacterClass> spacesCreate()
characterClass->m_matchesUnicode.append(0x205f);
characterClass->m_matchesUnicode.append(0x3000);
characterClass->m_matchesUnicode.append(0xfeff);
characterClass->m_hasNonBMPCharacters = false;
return characterClass;
}
std::unique_ptr<CharacterClass> nondigitsCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x7f));
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x10ffff));
characterClass->m_hasNonBMPCharacters = true;
characterClass->m_characterWidths = CharacterClassWidths::HasBMPChars;
return characterClass;
}
std::unique_ptr<CharacterClass> nonspacesCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x08));
characterClass->m_ranges.append(CharacterRange(0x0e, 0x1f));
characterClass->m_ranges.append(CharacterRange(0x21, 0x7f));
@ -65,54 +95,24 @@ std::unique_ptr<CharacterClass> nonspacesCreate()
characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xfefe));
characterClass->m_rangesUnicode.append(CharacterRange(0xff00, 0x10ffff));
characterClass->m_hasNonBMPCharacters = true;
characterClass->m_characterWidths = CharacterClassWidths::HasBothBMPAndNonBMP;
return characterClass;
}
std::unique_ptr<CharacterClass> nonwordcharCreate()
std::unique_ptr<CharacterClass> digitsCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_characterWidths = CharacterClassWidths::HasBMPChars;
return characterClass;
}
std::unique_ptr<CharacterClass> nondigitsCreate()
{
auto characterClass = makeUnique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x00, 0x2f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x40));
characterClass->m_ranges.append(CharacterRange(0x5b, 0x5e));
characterClass->m_matches.append(0x60);
characterClass->m_ranges.append(CharacterRange(0x7b, 0x7f));
characterClass->m_ranges.append(CharacterRange(0x3a, 0x7f));
characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x10ffff));
characterClass->m_hasNonBMPCharacters = true;
return characterClass;
}
std::unique_ptr<CharacterClass> newlineCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_matches.append(0x0a);
characterClass->m_matches.append(0x0d);
characterClass->m_matchesUnicode.append(0x2028);
characterClass->m_matchesUnicode.append(0x2029);
characterClass->m_hasNonBMPCharacters = false;
return characterClass;
}
std::unique_ptr<CharacterClass> wordcharCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f);
characterClass->m_ranges.append(CharacterRange(0x61, 0x7a));
characterClass->m_hasNonBMPCharacters = false;
return characterClass;
}
std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate()
{
auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_ranges.append(CharacterRange(0x30, 0x39));
characterClass->m_ranges.append(CharacterRange(0x41, 0x5a));
characterClass->m_matches.append(0x5f);
characterClass->m_ranges.append(CharacterRange(0x61, 0x7a));
characterClass->m_matchesUnicode.append(0x017f);
characterClass->m_matchesUnicode.append(0x212a);
characterClass->m_hasNonBMPCharacters = false;
characterClass->m_characterWidths = CharacterClassWidths::HasBothBMPAndNonBMP;
return characterClass;
}

115
third_party/yarr/StringHasher.h vendored Normal file
View file

@ -0,0 +1,115 @@
/*
* Copyright (C) 2005-2023 Apple Inc. All rights reserved.
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#pragma once
#include <array>
namespace WTF {
// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
static constexpr unsigned stringHashingStartValue = 0x9E3779B9U;
class SuperFastHash;
class WYHash;
class StringHasher {
WTF_MAKE_FAST_ALLOCATED;
public:
static constexpr unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
static constexpr unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1;
static constexpr unsigned numberOfCharactersInLargestBulkForWYHash = 24; // Don't change this value. It's fixed for WYhash algorithm.
// Things need to do to update this threshold:
// 1. This threshold must stay in sync with the threshold in the scripts create_hash_table, Hasher.pm, and hasher.py.
// 2. Run script `run-bindings-tests --reset-results` to update all CompactHashIndex's under path `WebCore/bindings/scripts/test/JS/`.
// 3. Manually update all CompactHashIndex's in JSDollarVM.cpp by using createHashTable in hasher.py.
static constexpr unsigned smallStringThreshold = numberOfCharactersInLargestBulkForWYHash * 2;
struct DefaultConverter {
template<typename CharType>
static constexpr UChar convert(CharType character)
{
return static_cast<std::make_unsigned_t<CharType>>((character));
}
};
StringHasher() = default;
template<typename T, typename Converter = DefaultConverter>
static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned characterCount);
template<typename T, unsigned characterCount>
static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[characterCount]);
void addCharacter(UChar character);
// hashWithTop8BitsMasked will reset to initial status.
unsigned hashWithTop8BitsMasked();
private:
friend class SuperFastHash;
friend class WYHash;
ALWAYS_INLINE static unsigned avalancheBits(unsigned hash)
{
unsigned result = hash;
result ^= result << 3;
result += result >> 5;
result ^= result << 2;
result += result >> 15;
result ^= result << 10;
return result;
}
ALWAYS_INLINE static unsigned finalize(unsigned hash)
{
return avoidZero(avalancheBits(hash));
}
ALWAYS_INLINE static unsigned finalizeAndMaskTop8Bits(unsigned hash)
{
// Reserving space from the high bits for flags preserves most of the hash's
// value, since hash lookup typically masks out the high bits anyway.
return avoidZero(avalancheBits(hash) & StringHasher::maskHash);
}
// This avoids ever returning a hash code of 0, since that is used to
// signal "hash not computed yet". Setting the high bit maintains
// reasonable fidelity to a hash code of 0 because it is likely to yield
// exactly 0 when hash lookup masks out the high bits.
ALWAYS_INLINE static unsigned avoidZero(unsigned hash)
{
if (hash)
return hash;
return 0x80000000 >> flagCount;
}
unsigned m_hash { stringHashingStartValue };
UChar m_pendingCharacter { 0 };
bool m_hasPendingCharacter { false };
};
} // namespace WTF
using WTF::StringHasher;

36
third_party/yarr/StringHasherInlines.h vendored Normal file
View file

@ -0,0 +1,36 @@
/*
* Copyright (C) 2023 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#pragma once
#include "StringHasher.h"
#include "SuperFastHash.h"
namespace WTF {
template<typename T, typename Converter>
unsigned StringHasher::computeHashAndMaskTop8Bits(const T* data, unsigned characterCount)
{
return SuperFastHash::computeHashAndMaskTop8Bits<T, Converter>(data, characterCount);
}
} // namespace WTF
using WTF::StringHasher;

123
third_party/yarr/SuperFastHash.h vendored Normal file
View file

@ -0,0 +1,123 @@
/*
* Copyright (C) 2005-2023 Apple Inc. All rights reserved.
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#pragma once
namespace WTF {
// Paul Hsieh's SuperFastHash
// http://www.azillionmonkeys.com/qed/hash.html
// LChar data is interpreted as Latin-1-encoded (zero-extended to 16 bits).
// NOTE: The hash computation here must stay in sync with the create_hash_table script in
// JavaScriptCore and the CodeGeneratorJS.pm script in WebCore.
class SuperFastHash {
WTF_MAKE_FAST_ALLOCATED;
public:
static constexpr unsigned flagCount = StringHasher::flagCount;
static constexpr unsigned maskHash = StringHasher::maskHash;
typedef StringHasher::DefaultConverter DefaultConverter;
template<typename T, typename Converter = DefaultConverter>
ALWAYS_INLINE static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
{
return StringHasher::finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data, length));
}
template<typename T, typename Converter = DefaultConverter>
ALWAYS_INLINE static unsigned computeHashAndMaskTop8Bits(const T* data)
{
return StringHasher::finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data));
}
template<typename T, typename Converter = DefaultConverter>
static unsigned computeHash(const T* data, unsigned length)
{
return StringHasher::finalize(computeHashImpl<T, Converter>(data, length));
}
template<typename T, typename Converter = DefaultConverter>
static unsigned computeHash(const T* data)
{
return StringHasher::finalize(computeHashImpl<T, Converter>(data));
}
private:
friend class StringHasher;
static unsigned calculateWithRemainingLastCharacter(unsigned hash, unsigned character)
{
unsigned result = hash;
result += character;
result ^= result << 11;
result += result >> 17;
return result;
}
ALWAYS_INLINE static unsigned calculateWithTwoCharacters(unsigned hash, unsigned firstCharacter, unsigned secondCharacter)
{
unsigned result = hash;
result += firstCharacter;
result = (result << 16) ^ ((secondCharacter << 11) ^ result);
result += result >> 11;
return result;
}
template<typename T, typename Converter>
static unsigned computeHashImpl(const T* characters, unsigned length)
{
unsigned result = stringHashingStartValue;
bool remainder = length & 1;
length >>= 1;
while (length--) {
result = calculateWithTwoCharacters(result, Converter::convert(characters[0]), Converter::convert(characters[1]));
characters += 2;
}
if (remainder)
return calculateWithRemainingLastCharacter(result, Converter::convert(characters[0]));
return result;
}
template<typename T, typename Converter>
static unsigned computeHashImpl(const T* characters)
{
unsigned result = stringHashingStartValue;
while (T a = *characters++) {
T b = *characters++;
if (!b)
return calculateWithRemainingLastCharacter(result, Converter::convert(a));
result = calculateWithTwoCharacters(result, Converter::convert(a), Converter::convert(b));
}
return result;
}
};
} // namespace WTF
using WTF::SuperFastHash;

File diff suppressed because it is too large Load diff

View file

@ -41,7 +41,135 @@
#define yarr_wtfbridge_h
#include "Escargot.h"
#include <type_traits>
#define JS_EXPORT_PRIVATE
#define WTF_EXPORT_PRIVATE
#define WTF_MAKE_FAST_ALLOCATED
#define NO_RETURN_DUE_TO_ASSERT
#define ASSERT_WITH_SECURITY_IMPLICATION ASSERT
#define UNUSED_PARAM(e)
#define ASSERT_UNUSED(variable, assertion) ASSERT(assertion)
#define PLATFORM(WTF_FEATURE) (defined WTF_PLATFORM_##WTF_FEATURE && WTF_PLATFORM_##WTF_FEATURE)
#define CPU(WTF_FEATURE) (defined WTF_CPU_##WTF_FEATURE && WTF_CPU_##WTF_FEATURE)
#define HAVE(WTF_FEATURE) (defined HAVE_##WTF_FEATURE && HAVE_##WTF_FEATURE)
#define OS(NAME) (defined OS_##NAME && OS_##NAME)
#define USE(WTF_FEATURE) (defined WTF_USE_##WTF_FEATURE && WTF_USE_##WTF_FEATURE)
#define ENABLE(WTF_FEATURE) (defined ENABLE_##WTF_FEATURE && ENABLE_##WTF_FEATURE)
#if ESCARGOT_64
#define WTF_CPU_X86_64 1
#endif
#if defined(OS_WINDOWS)
#define WTF_OS_WINDOWS 1
#else
#define WTF_OS_LINUX 1
#define WTF_OS_UNIX 1
#define HAVE_ERRNO_H 1
#define HAVE_MMAP 1
#endif
#define WTFMove std::move
#if ESCARGOT_64
using CPURegister = int64_t;
using UCPURegister = uint64_t;
#else
using CPURegister = int32_t;
using UCPURegister = uint32_t;
#endif
#if (__cplusplus < 201402L)
namespace std {
// NOTE there is no make_unique in c++11
template <typename T, typename... Ts>
std::unique_ptr<T> make_unique(Ts&&... params)
{
return std::unique_ptr<T>(new T(std::forward<Ts>(params)...));
}
// NOTE there is no conditional_t in c++11
template< bool B, class T, class F >
using conditional_t = typename conditional<B,T,F>::type;
// NOTE there is no make_unsigned_t in c++11
template< class T >
using make_unsigned_t = typename make_unsigned<T>::type;
}
#endif
template <typename T, typename... Ts>
std::unique_ptr<T> makeUnique(Ts&&... params)
{
return std::unique_ptr<T>(new T(std::forward<Ts>(params)...));
}
template<typename T> constexpr bool hasOneBitSet(T value)
{
return !((value - 1) & value) && value;
}
inline constexpr bool isPowerOfTwo(size_t size) { return !(size & (size - 1)); }
template<typename T, typename U>
ALWAYS_INLINE T roundUpToMultipleOfImpl(U divisor, T x)
{
T remainderMask = static_cast<T>(divisor) - 1;
return (x + remainderMask) & ~remainderMask;
}
// Efficient implementation that takes advantage of powers of two.
template<typename T, typename U>
inline constexpr T roundUpToMultipleOf(U divisor, T x)
{
return roundUpToMultipleOfImpl<T, U>(divisor, x);
}
template<size_t divisor> constexpr size_t roundUpToMultipleOf(size_t x)
{
static_assert(divisor && isPowerOfTwo(divisor), "");
return roundUpToMultipleOfImpl(divisor, x);
}
template<size_t divisor, typename T> inline constexpr T* roundUpToMultipleOf(T* x)
{
static_assert(sizeof(T*) == sizeof(size_t), "");
return reinterpret_cast<T*>(roundUpToMultipleOf<divisor>(reinterpret_cast<size_t>(x)));
}
template<typename T>
bool findBitInWord(T word, size_t& startOrResultIndex, size_t endIndex, bool value)
{
static_assert(std::is_unsigned<T>::value, "Type used in findBitInWord must be unsigned");
constexpr size_t bitsInWord = sizeof(word) * 8;
ASSERT_UNUSED(bitsInWord, startOrResultIndex <= bitsInWord && endIndex <= bitsInWord);
size_t index = startOrResultIndex;
word >>= index;
while (index < endIndex) {
if ((word & 1) == static_cast<T>(value)) {
startOrResultIndex = index;
return true;
}
index++;
word >>= 1;
}
startOrResultIndex = endIndex;
return false;
}
#include "runtime/String.h"
#include "CheckedArithmetic.h"
#include "StringHasher.h"
#include "SuperFastHash.h"
#include "StringHasherInlines.h"
#include <stdio.h>
#include <stdarg.h>
@ -59,6 +187,7 @@ template <typename T, size_t N = 0>
class Vector {
public:
typedef typename std::vector<T>::iterator iterator;
typedef typename std::vector<T>::const_iterator const_iterator;
std::vector<T> impl;
public:
@ -68,6 +197,14 @@ public:
append(v);
}
Vector(const T* v, size_t len)
{
impl.reserve(len);
for (size_t i = 0; i < len; i ++) {
impl.push_back(v[i]);
}
}
Vector(std::initializer_list<T> list)
{
impl.reserve(list.size());
@ -111,6 +248,16 @@ public:
return impl.end();
}
const_iterator begin() const
{
return impl.begin();
}
const_iterator end() const
{
return impl.end();
}
T& last()
{
return impl.back();
@ -148,11 +295,21 @@ public:
impl.erase(impl.begin() + i);
}
void removeLast()
{
impl.pop_back();
}
void clear()
{
std::vector<T>().swap(impl);
}
void grow(size_t s)
{
impl.resize(s);
}
void shrink(size_t newLength)
{
ASSERT(newLength <= impl.size());
@ -176,7 +333,7 @@ public:
impl.reserve(siz);
}
void swap(Vector& other)
void swap(Vector<T, N>& other)
{
impl.swap(other.impl);
}
@ -190,6 +347,25 @@ public:
{
impl.reserve(capacity);
}
T takeLast()
{
T last(*impl.rbegin());
impl.pop_back();
return last;
}
void fill(const T& val, size_t newSize)
{
if (size() > newSize)
shrink(newSize);
else if (newSize > capacity()) {
clear();
grow(newSize);
}
std::fill(begin(), end(), val);
}
};
@ -209,19 +385,20 @@ dataLog(const char* fmt, ...)
va_end(ap);
}
/*
* Do-nothing version of a macro used by WTF to avoid unused
* parameter warnings.
*/
#define UNUSED_PARAM(e)
#define ASSERT_UNUSED(variable, assertion) ASSERT(assertion)
template <typename Key, typename Value, typename Allocator = std::allocator<std::pair<Key const, Value>>>
class HashMap : public std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator> {
public:
void add(const Key& k, const Value& v)
struct AddResult {
bool isNewEntry;
typename std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::iterator iterator;
};
AddResult add(const Key& k, const Value& v)
{
std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::insert(std::make_pair(k, v));
AddResult r;
auto result = std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::insert(std::make_pair(k, v));
r.iterator = result.first;
r.isNewEntry = result.second;
return r;
}
const Value& get(const Key& k)
@ -243,90 +420,32 @@ public:
return r;
}
template<typename Other>
void formUnion(const Other& other)
{
for (const auto& value: other) {
add(value);
}
}
bool contains(const Key& k)
{
return std::unordered_set<Key, std::hash<Key>, std::equal_to<Key>, Allocator>::find(k) != std::unordered_set<Key, std::hash<Key>, std::equal_to<Key>, Allocator>::end();
}
bool isEmpty()
{
return std::unordered_set<Key, std::hash<Key>, std::equal_to<Key>, Allocator>::empty();
}
};
template <typename T>
class Optional {
public:
Optional()
: m_hasValue(false)
, m_value()
{
}
Optional(T value)
: m_hasValue(true)
, m_value(value)
{
}
Optional(std::nullptr_t value)
: m_hasValue(false)
, m_value()
{
}
T value()
{
ASSERT(m_hasValue);
return m_value;
}
const T value() const
{
ASSERT(m_hasValue);
return m_value;
}
bool hasValue() const
{
return m_hasValue;
}
operator bool() const
{
return hasValue();
}
bool operator==(const Optional<T>& other) const
{
if (m_hasValue != other.hasValue()) {
return false;
}
return m_hasValue ? m_value == other.m_value : true;
}
bool operator!=(const Optional<T>& other) const
{
return !this->operator==(other);
}
bool operator==(const T& other) const
{
if (m_hasValue) {
return value() == other;
}
return false;
}
bool operator!=(const T& other) const
{
return !operator==(other);
}
protected:
bool m_hasValue;
T m_value;
};
template<typename T>
using Optional = Escargot::Optional<T>;
class String {
public:
String()
: m_impl(::Escargot::String::emptyString)
: m_impl()
{
}
@ -337,38 +456,71 @@ public:
ALWAYS_INLINE char16_t operator[](const size_t idx) const
{
if (isNull()) {
return 0;
}
return m_impl->charAt(idx);
}
ALWAYS_INLINE size_t length() const
{
if (isNull()) {
return 0;
}
return m_impl->length();
}
bool equals(const String& src) const
{
return m_impl->equals(src.m_impl);
if (isNull() && src.isNull()) {
return true;
}
if (isNull() || src.isNull()) {
return false;
}
return m_impl.value()->equals(src.m_impl.value());
}
ALWAYS_INLINE size_t hashValue() const
ALWAYS_INLINE size_t hash() const
{
return m_impl->hashValue();
if (isNull()) {
return 0;
}
if (m_impl->is8Bit()) {
return StringHasher::computeHashAndMaskTop8Bits(m_impl->characters8(), m_impl->length());
} else {
return StringHasher::computeHashAndMaskTop8Bits(m_impl->characters16(), m_impl->length());
}
}
bool contains(char c) const
{
if (isNull()) {
return false;
}
char b[2] = { c, 0x0 };
return m_impl->contains(b);
}
bool isNull() const
{
return !m_impl;
}
ALWAYS_INLINE bool is8Bit() const
{
if (isNull()) {
return true;
}
return m_impl->is8Bit();
}
template <typename Any>
const Any* characters() const
{
if (isNull()) {
return nullptr;
}
if (is8Bit()) {
return (Any*)m_impl->characters8();
} else {
@ -378,29 +530,40 @@ public:
ALWAYS_INLINE const LChar* characters8() const
{
if (isNull()) {
return nullptr;
}
return m_impl->characters8();
}
ALWAYS_INLINE const char16_t* characters16() const
{
if (isNull()) {
return nullptr;
}
return m_impl->characters16();
}
ALWAYS_INLINE::Escargot::String* impl()
{
return m_impl;
return m_impl.value();
}
template <const size_t srcLen>
ALWAYS_INLINE bool operator==(const char (&src)[srcLen]) const
{
if (isNull()) {
return !srcLen;
}
return m_impl->equals(src);
}
private:
::Escargot::String* m_impl;
Optional<::Escargot::String*> m_impl;
};
using StringView = const String&;
class StringBuilder {
public:
void append(int c)
@ -427,6 +590,8 @@ private:
::Escargot::StringBuilder m_impl;
};
typedef Checked<uint32_t, RecordOverflow> CheckedUint32;
} /* namespace Yarr */
} /* namespace JSC */
@ -436,7 +601,7 @@ template <>
struct hash<::JSC::Yarr::String> {
size_t operator()(::JSC::Yarr::String const& x) const
{
return x.hashValue();
return x.hash();
}
};
@ -452,49 +617,42 @@ struct equal_to<::JSC::Yarr::String> {
namespace WTF {
const size_t notFound = static_cast<size_t>(-1);
using String = ::JSC::Yarr::String;
}
#define JS_EXPORT_PRIVATE
#define WTF_EXPORT_PRIVATE
#define WTF_MAKE_FAST_ALLOCATED
#define NO_RETURN_DUE_TO_ASSERT
#define PLATFORM(WTF_FEATURE) (defined WTF_PLATFORM_##WTF_FEATURE && WTF_PLATFORM_##WTF_FEATURE)
#define CPU(WTF_FEATURE) (defined WTF_CPU_##WTF_FEATURE && WTF_CPU_##WTF_FEATURE)
#define HAVE(WTF_FEATURE) (defined HAVE_##WTF_FEATURE && HAVE_##WTF_FEATURE)
#define OS(NAME) (defined OS_##NAME && OS_##NAME)
#define USE(WTF_FEATURE) (defined WTF_USE_##WTF_FEATURE && WTF_USE_##WTF_FEATURE)
#define ENABLE(WTF_FEATURE) (defined ENABLE_##WTF_FEATURE && ENABLE_##WTF_FEATURE)
#if ESCARGOT_64
#define WTF_CPU_X86_64 1
#endif
#if defined(OS_WINDOWS)
#define WTF_OS_WINDOWS 1
#else
#define WTF_OS_LINUX 1
#define WTF_OS_UNIX 1
#define HAVE_ERRNO_H 1
#define HAVE_MMAP 1
#endif
#define WTFMove std::move
// NOTE there is no make_unique in c++11
#if (__cplusplus < 201402L)
namespace std {
template <typename T, typename... Ts>
std::unique_ptr<T> make_unique(Ts&&... params)
// Returns a count of the number of bits set in 'bits'.
inline size_t bitCount(unsigned bits)
{
return std::unique_ptr<T>(new T(std::forward<Ts>(params)...));
bits = bits - ((bits >> 1) & 0x55555555);
bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
return (((bits + (bits >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}
inline size_t bitCount(uint64_t bits)
{
return bitCount(static_cast<unsigned>(bits)) + bitCount(static_cast<unsigned>(bits >> 32));
}
template <typename ToType, typename FromType>
inline ToType bitwise_cast(FromType from)
{
ASSERT(sizeof(FromType) == sizeof(ToType));
union {
FromType from;
ToType to;
} u;
u.from = from;
return u.to;
}
}
#endif
#include "ASCIICType.h"
#include "BitSet.h"
#include "BitVector.h"
#include "OptionSet.h"
#undef RELEASE_ASSERT
#define RELEASE_ASSERT ASSERT
typedef const char* ASCIILiteral;
#endif /* yarr_wtfbridge_h */

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2009 Apple Inc. All rights reserved.
* Copyright (C) 2009-2019 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
* All rights reserved.
*
@ -30,12 +30,11 @@
#include <limits.h>
#include "YarrErrorCode.h"
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
#define YarrStackSpaceForBackTrackInfoPatternCharacter 2 // Only for !fixed quantifiers.
#define YarrStackSpaceForBackTrackInfoCharacterClass 2 // Only for !fixed quantifiers.
#define YarrStackSpaceForBackTrackInfoBackReference 2
#define YarrStackSpaceForBackTrackInfoBackReference 3
#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1
#define YarrStackSpaceForBackTrackInfoParenthesesOnce 2
@ -43,24 +42,26 @@ namespace Yarr {
#define YarrStackSpaceForBackTrackInfoParentheses 4
#define YarrStackSpaceForDotStarEnclosure 1
static const unsigned quantifyInfinite = UINT_MAX;
static const unsigned offsetNoMatch = std::numeric_limits<unsigned>::max();
static constexpr unsigned quantifyInfinite = UINT_MAX;
static constexpr unsigned offsetNoMatch = std::numeric_limits<unsigned>::max();
// The below limit restricts the number of "recursive" match calls in order to
// avoid spending exponential time on complex regular expressions.
static const unsigned matchLimit = 1000000;
static constexpr unsigned matchLimit = 1000000;
enum JSRegExpResult {
JSRegExpMatch = 1,
JSRegExpNoMatch = 0,
JSRegExpErrorNoMatch = -1,
JSRegExpJITCodeFailure = -2,
JSRegExpErrorHitLimit = -3,
JSRegExpErrorNoMemory = -4,
JSRegExpErrorInternal = -5,
enum class MatchFrom { VMThread, CompilerThread };
enum class JSRegExpResult {
Match = 1,
NoMatch = 0,
ErrorNoMatch = -1,
JITCodeFailure = -2,
ErrorHitLimit = -3,
ErrorNoMemory = -4,
ErrorInternal = -5,
};
enum YarrCharSize {
enum class CharSize : uint8_t {
Char8,
Char16
};
@ -70,9 +71,9 @@ enum class BuiltInCharacterClassID : unsigned {
SpaceClassID,
WordClassID,
DotClassID,
BaseUnicodePropertyID
BaseUnicodePropertyID,
};
struct BytecodePattern;
}
} // namespace JSC::Yarr
} } // namespace JSC::Yarr

View file

@ -27,8 +27,7 @@
#include <stdint.h>
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
// This set of data provides information for each UCS2 code point as to the set of code points
// that it should match under the ES6 case insensitive RegExp matching rules, specified in 21.2.2.8.2.
@ -36,43 +35,113 @@ namespace Yarr {
// The Unicode tables are autogenerated using the python script generateYarrCanonicalizeUnicode
// which creates YarrCanonicalizeUnicode.cpp.
enum UCS2CanonicalizationType {
CanonicalizeUnique, // No canonically equal values, e.g. 0x0.
CanonicalizeSet, // Value indicates a set in characterSetInfo.
CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61.
CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41.
CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5.
CanonicalizeUnique, // No canonically equal values, e.g. 0x0.
CanonicalizeSet, // Value indicates a set in characterSetInfo.
CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61.
CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41.
CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5.
CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242.
};
struct CanonicalizationRange {
UChar32 begin;
UChar32 end;
UChar32 value;
char32_t begin;
char32_t end;
char32_t value;
UCS2CanonicalizationType type;
};
extern const size_t UCS2_CANONICALIZATION_RANGES;
extern const UChar32* const ucs2CharacterSetInfo[];
extern const char32_t* const ucs2CharacterSetInfo[];
extern const CanonicalizationRange ucs2RangeInfo[];
extern const uint16_t canonicalTableLChar[256];
extern const size_t UNICODE_CANONICALIZATION_RANGES;
extern const UChar32* const unicodeCharacterSetInfo[];
#if defined(COMPILER_MSVC)
extern const char32_t* unicodeCharacterSetInfo[];
#else
extern const char32_t* const unicodeCharacterSetInfo[];
#endif
extern const CanonicalizationRange unicodeRangeInfo[];
enum class CanonicalMode { UCS2,
Unicode };
enum class CanonicalMode { UCS2, Unicode };
inline const UChar32* canonicalCharacterSetInfo(unsigned index, CanonicalMode canonicalMode)
inline const char32_t* canonicalCharacterSetInfo(unsigned index, CanonicalMode canonicalMode)
{
const UChar32* const* rangeInfo = canonicalMode == CanonicalMode::UCS2 ? ucs2CharacterSetInfo : unicodeCharacterSetInfo;
const char32_t* const* rangeInfo = canonicalMode == CanonicalMode::UCS2 ? ucs2CharacterSetInfo : unicodeCharacterSetInfo;
return rangeInfo[index];
}
// This searches in log2 time over ~400-600 entries, so should typically result in 9 compares.
const CanonicalizationRange* canonicalRangeInfoFor(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2);
// Should only be called for characters that have one canonically matching value.
UChar32 getCanonicalPair(const CanonicalizationRange* info, UChar32 ch);
// Returns true if no other UCS2 codepoint can match this value.
bool isCanonicallyUnique(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2);
// Returns true if values are equal, under the canonicalization rules.
bool areCanonicallyEquivalent(UChar32 a, UChar32 b, CanonicalMode canonicalMode = CanonicalMode::UCS2);
inline const CanonicalizationRange* canonicalRangeInfoFor(char32_t ch, CanonicalMode canonicalMode = CanonicalMode::UCS2)
{
const CanonicalizationRange* info = canonicalMode == CanonicalMode::UCS2 ? ucs2RangeInfo : unicodeRangeInfo;
size_t entries = canonicalMode == CanonicalMode::UCS2 ? UCS2_CANONICALIZATION_RANGES : UNICODE_CANONICALIZATION_RANGES;
while (true) {
size_t candidate = entries >> 1;
const CanonicalizationRange* candidateInfo = info + candidate;
if (ch < candidateInfo->begin)
entries = candidate;
else if (ch <= candidateInfo->end)
return candidateInfo;
else {
info = candidateInfo + 1;
entries -= (candidate + 1);
}
}
}
} // JSC::Yarr
// Should only be called for characters that have one canonically matching value.
inline char32_t getCanonicalPair(const CanonicalizationRange* info, char32_t ch)
{
ASSERT(ch >= info->begin && ch <= info->end);
switch (info->type) {
case CanonicalizeRangeLo:
return ch + info->value;
case CanonicalizeRangeHi:
return ch - info->value;
case CanonicalizeAlternatingAligned:
return ch ^ 1;
case CanonicalizeAlternatingUnaligned:
return ((ch - 1) ^ 1) + 1;
default:
RELEASE_ASSERT_NOT_REACHED();
}
RELEASE_ASSERT_NOT_REACHED();
return 0;
}
// Returns true if no other UCS2 codepoint can match this value.
inline bool isCanonicallyUnique(char32_t ch, CanonicalMode canonicalMode = CanonicalMode::UCS2)
{
return canonicalRangeInfoFor(ch, canonicalMode)->type == CanonicalizeUnique;
}
// Returns true if values are equal, under the canonicalization rules.
inline bool areCanonicallyEquivalent(char32_t a, char32_t b, CanonicalMode canonicalMode = CanonicalMode::UCS2)
{
auto* info = canonicalRangeInfoFor(a, canonicalMode);
switch (info->type) {
case CanonicalizeUnique:
return a == b;
case CanonicalizeSet: {
for (auto* set = canonicalCharacterSetInfo(info->value, canonicalMode); (a = *set); ++set) {
if (a == b)
return true;
}
return false;
}
case CanonicalizeRangeLo:
return (a == b) || (a + info->value == b);
case CanonicalizeRangeHi:
return (a == b) || (a - info->value == b);
case CanonicalizeAlternatingAligned:
return (a | 1) == (b | 1);
case CanonicalizeAlternatingUnaligned:
return ((a - 1) | 1) == ((b - 1) | 1);
}
RELEASE_ASSERT_NOT_REACHED();
return false;
}
} } // JSC::Yarr

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved.
* Copyright (C) 2012-2018 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -23,32 +23,39 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js
// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js
#include "WTFBridge.h"
#include "YarrCanonicalize.h"
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
const UChar32 ucs2CharacterSet0[] = { 0x01c4, 0x01c5, 0x01c6, 0 };
const UChar32 ucs2CharacterSet1[] = { 0x01c7, 0x01c8, 0x01c9, 0 };
const UChar32 ucs2CharacterSet2[] = { 0x01ca, 0x01cb, 0x01cc, 0 };
const UChar32 ucs2CharacterSet3[] = { 0x01f1, 0x01f2, 0x01f3, 0 };
const UChar32 ucs2CharacterSet4[] = { 0x0392, 0x03b2, 0x03d0, 0 };
const UChar32 ucs2CharacterSet5[] = { 0x0395, 0x03b5, 0x03f5, 0 };
const UChar32 ucs2CharacterSet6[] = { 0x0398, 0x03b8, 0x03d1, 0 };
const UChar32 ucs2CharacterSet7[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 };
const UChar32 ucs2CharacterSet8[] = { 0x039a, 0x03ba, 0x03f0, 0 };
const UChar32 ucs2CharacterSet9[] = { 0x00b5, 0x039c, 0x03bc, 0 };
const UChar32 ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 };
const UChar32 ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 };
const UChar32 ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 };
const UChar32 ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 };
const UChar32 ucs2CharacterSet14[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
constexpr char32_t ucs2CharacterSet0[] = { 0x01c4, 0x01c5, 0x01c6, 0 };
constexpr char32_t ucs2CharacterSet1[] = { 0x01c7, 0x01c8, 0x01c9, 0 };
constexpr char32_t ucs2CharacterSet2[] = { 0x01ca, 0x01cb, 0x01cc, 0 };
constexpr char32_t ucs2CharacterSet3[] = { 0x01f1, 0x01f2, 0x01f3, 0 };
constexpr char32_t ucs2CharacterSet4[] = { 0x0392, 0x03b2, 0x03d0, 0 };
constexpr char32_t ucs2CharacterSet5[] = { 0x0395, 0x03b5, 0x03f5, 0 };
constexpr char32_t ucs2CharacterSet6[] = { 0x0398, 0x03b8, 0x03d1, 0 };
constexpr char32_t ucs2CharacterSet7[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 };
constexpr char32_t ucs2CharacterSet8[] = { 0x039a, 0x03ba, 0x03f0, 0 };
constexpr char32_t ucs2CharacterSet9[] = { 0x00b5, 0x039c, 0x03bc, 0 };
constexpr char32_t ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 };
constexpr char32_t ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 };
constexpr char32_t ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 };
constexpr char32_t ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 };
constexpr char32_t ucs2CharacterSet14[] = { 0x0412, 0x0432, 0x1c80, 0 };
constexpr char32_t ucs2CharacterSet15[] = { 0x0414, 0x0434, 0x1c81, 0 };
constexpr char32_t ucs2CharacterSet16[] = { 0x041e, 0x043e, 0x1c82, 0 };
constexpr char32_t ucs2CharacterSet17[] = { 0x0421, 0x0441, 0x1c83, 0 };
constexpr char32_t ucs2CharacterSet18[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 };
constexpr char32_t ucs2CharacterSet19[] = { 0x042a, 0x044a, 0x1c86, 0 };
constexpr char32_t ucs2CharacterSet20[] = { 0x0462, 0x0463, 0x1c87, 0 };
constexpr char32_t ucs2CharacterSet21[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
constexpr char32_t ucs2CharacterSet22[] = { 0x1c88, 0xa64a, 0xa64b, 0 };
static const size_t UCS2_CANONICALIZATION_SETS = 15;
const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = {
static constexpr size_t UCS2_CANONICALIZATION_SETS = 23;
const char32_t* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = {
ucs2CharacterSet0,
ucs2CharacterSet1,
ucs2CharacterSet2,
@ -64,9 +71,17 @@ const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = {
ucs2CharacterSet12,
ucs2CharacterSet13,
ucs2CharacterSet14,
ucs2CharacterSet15,
ucs2CharacterSet16,
ucs2CharacterSet17,
ucs2CharacterSet18,
ucs2CharacterSet19,
ucs2CharacterSet20,
ucs2CharacterSet21,
ucs2CharacterSet22,
};
const size_t UCS2_CANONICALIZATION_RANGES = 391;
const size_t UCS2_CANONICALIZATION_RANGES = 448;
const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x0000, 0x0040, 0x0000, CanonicalizeUnique },
{ 0x0041, 0x005a, 0x0020, CanonicalizeRangeLo },
@ -183,7 +198,7 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x0267, 0x0267, 0x0000, CanonicalizeUnique },
{ 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi },
{ 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi },
{ 0x026a, 0x026a, 0x0000, CanonicalizeUnique },
{ 0x026a, 0x026a, 0xa544, CanonicalizeRangeLo },
{ 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo },
{ 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo },
{ 0x026d, 0x026e, 0x0000, CanonicalizeUnique },
@ -207,7 +222,8 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi },
{ 0x028d, 0x0291, 0x0000, CanonicalizeUnique },
{ 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi },
{ 0x0293, 0x029d, 0x0000, CanonicalizeUnique },
{ 0x0293, 0x029c, 0x0000, CanonicalizeUnique },
{ 0x029d, 0x029d, 0xa515, CanonicalizeRangeLo },
{ 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo },
{ 0x029f, 0x0344, 0x0000, CanonicalizeUnique },
{ 0x0345, 0x0345, 0x0007, CanonicalizeSet },
@ -289,10 +305,34 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique },
{ 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi },
{ 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo },
{ 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo },
{ 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi },
{ 0x0410, 0x0411, 0x0020, CanonicalizeRangeLo },
{ 0x0412, 0x0412, 0x000e, CanonicalizeSet },
{ 0x0413, 0x0413, 0x0020, CanonicalizeRangeLo },
{ 0x0414, 0x0414, 0x000f, CanonicalizeSet },
{ 0x0415, 0x041d, 0x0020, CanonicalizeRangeLo },
{ 0x041e, 0x041e, 0x0010, CanonicalizeSet },
{ 0x041f, 0x0420, 0x0020, CanonicalizeRangeLo },
{ 0x0421, 0x0421, 0x0011, CanonicalizeSet },
{ 0x0422, 0x0422, 0x0012, CanonicalizeSet },
{ 0x0423, 0x0429, 0x0020, CanonicalizeRangeLo },
{ 0x042a, 0x042a, 0x0013, CanonicalizeSet },
{ 0x042b, 0x042f, 0x0020, CanonicalizeRangeLo },
{ 0x0430, 0x0431, 0x0020, CanonicalizeRangeHi },
{ 0x0432, 0x0432, 0x000e, CanonicalizeSet },
{ 0x0433, 0x0433, 0x0020, CanonicalizeRangeHi },
{ 0x0434, 0x0434, 0x000f, CanonicalizeSet },
{ 0x0435, 0x043d, 0x0020, CanonicalizeRangeHi },
{ 0x043e, 0x043e, 0x0010, CanonicalizeSet },
{ 0x043f, 0x0440, 0x0020, CanonicalizeRangeHi },
{ 0x0441, 0x0441, 0x0011, CanonicalizeSet },
{ 0x0442, 0x0442, 0x0012, CanonicalizeSet },
{ 0x0443, 0x0449, 0x0020, CanonicalizeRangeHi },
{ 0x044a, 0x044a, 0x0013, CanonicalizeSet },
{ 0x044b, 0x044f, 0x0020, CanonicalizeRangeHi },
{ 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi },
{ 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned },
{ 0x0460, 0x0461, 0x0000, CanonicalizeAlternatingAligned },
{ 0x0462, 0x0463, 0x0014, CanonicalizeSet },
{ 0x0464, 0x0481, 0x0000, CanonicalizeAlternatingAligned },
{ 0x0482, 0x0489, 0x0000, CanonicalizeUnique },
{ 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned },
{ 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo },
@ -309,16 +349,38 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo },
{ 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique },
{ 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo },
{ 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique },
{ 0x10ce, 0x10cf, 0x0000, CanonicalizeUnique },
{ 0x10d0, 0x10fa, 0x0bc0, CanonicalizeRangeLo },
{ 0x10fb, 0x10fc, 0x0000, CanonicalizeUnique },
{ 0x10fd, 0x10ff, 0x0bc0, CanonicalizeRangeLo },
{ 0x1100, 0x139f, 0x0000, CanonicalizeUnique },
{ 0x13a0, 0x13ef, 0x97d0, CanonicalizeRangeLo },
{ 0x13f0, 0x13f5, 0x0008, CanonicalizeRangeLo },
{ 0x13f6, 0x13f7, 0x0000, CanonicalizeUnique },
{ 0x13f8, 0x13fd, 0x0008, CanonicalizeRangeHi },
{ 0x13fe, 0x1c7f, 0x0000, CanonicalizeUnique },
{ 0x1c80, 0x1c80, 0x000e, CanonicalizeSet },
{ 0x1c81, 0x1c81, 0x000f, CanonicalizeSet },
{ 0x1c82, 0x1c82, 0x0010, CanonicalizeSet },
{ 0x1c83, 0x1c83, 0x0011, CanonicalizeSet },
{ 0x1c84, 0x1c85, 0x0012, CanonicalizeSet },
{ 0x1c86, 0x1c86, 0x0013, CanonicalizeSet },
{ 0x1c87, 0x1c87, 0x0014, CanonicalizeSet },
{ 0x1c88, 0x1c88, 0x0016, CanonicalizeSet },
{ 0x1c89, 0x1c8f, 0x0000, CanonicalizeUnique },
{ 0x1c90, 0x1cba, 0x0bc0, CanonicalizeRangeHi },
{ 0x1cbb, 0x1cbc, 0x0000, CanonicalizeUnique },
{ 0x1cbd, 0x1cbf, 0x0bc0, CanonicalizeRangeHi },
{ 0x1cc0, 0x1d78, 0x0000, CanonicalizeUnique },
{ 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo },
{ 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique },
{ 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo },
{ 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique },
{ 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1e60, 0x1e61, 0x000e, CanonicalizeSet },
{ 0x1e60, 0x1e61, 0x0015, CanonicalizeSet },
{ 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique },
{ 0x1e9b, 0x1e9b, 0x000e, CanonicalizeSet },
{ 0x1e9b, 0x1e9b, 0x0015, CanonicalizeSet },
{ 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique },
{ 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo },
@ -429,7 +491,9 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique },
{ 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi },
{ 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique },
{ 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa640, 0xa649, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa64a, 0xa64b, 0x0016, CanonicalizeSet },
{ 0xa64c, 0xa66d, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique },
{ 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa69c, 0xa721, 0x0000, CanonicalizeUnique },
@ -451,14 +515,41 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi },
{ 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi },
{ 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi },
{ 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique },
{ 0xa7ae, 0xa7ae, 0xa544, CanonicalizeRangeHi },
{ 0xa7af, 0xa7af, 0x0000, CanonicalizeUnique },
{ 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi },
{ 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi },
{ 0xa7b2, 0xff20, 0x0000, CanonicalizeUnique },
{ 0xa7b2, 0xa7b2, 0xa515, CanonicalizeRangeHi },
{ 0xa7b3, 0xa7b3, 0x03a0, CanonicalizeRangeLo },
{ 0xa7b4, 0xa7b9, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7ba, 0xab52, 0x0000, CanonicalizeUnique },
{ 0xab53, 0xab53, 0x03a0, CanonicalizeRangeHi },
{ 0xab54, 0xab6f, 0x0000, CanonicalizeUnique },
{ 0xab70, 0xabbf, 0x97d0, CanonicalizeRangeHi },
{ 0xabc0, 0xff20, 0x0000, CanonicalizeUnique },
{ 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo },
{ 0xff3b, 0xff40, 0x0000, CanonicalizeUnique },
{ 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi },
{ 0xff5b, 0xffff, 0x0000, CanonicalizeUnique },
};
}
} // JSC::Yarr
const uint16_t canonicalTableLChar[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0x39c, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x178
};
} } // JSC::Yarr

View file

@ -28,39 +28,42 @@
#include "WTFBridge.h"
#include "YarrCanonicalize.h"
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
const UChar32 unicodeCharacterSet0[] = { 0x004b, 0x006b, 0x212a, 0 };
const UChar32 unicodeCharacterSet1[] = { 0x0053, 0x0073, 0x017f, 0 };
const UChar32 unicodeCharacterSet2[] = { 0x00c5, 0x00e5, 0x212b, 0 };
const UChar32 unicodeCharacterSet3[] = { 0x01c4, 0x01c5, 0x01c6, 0 };
const UChar32 unicodeCharacterSet4[] = { 0x01c7, 0x01c8, 0x01c9, 0 };
const UChar32 unicodeCharacterSet5[] = { 0x01ca, 0x01cb, 0x01cc, 0 };
const UChar32 unicodeCharacterSet6[] = { 0x01f1, 0x01f2, 0x01f3, 0 };
const UChar32 unicodeCharacterSet7[] = { 0x0392, 0x03b2, 0x03d0, 0 };
const UChar32 unicodeCharacterSet8[] = { 0x0395, 0x03b5, 0x03f5, 0 };
const UChar32 unicodeCharacterSet9[] = { 0x0398, 0x03b8, 0x03d1, 0x03f4, 0 };
const UChar32 unicodeCharacterSet10[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 };
const UChar32 unicodeCharacterSet11[] = { 0x039a, 0x03ba, 0x03f0, 0 };
const UChar32 unicodeCharacterSet12[] = { 0x00b5, 0x039c, 0x03bc, 0 };
const UChar32 unicodeCharacterSet13[] = { 0x03a0, 0x03c0, 0x03d6, 0 };
const UChar32 unicodeCharacterSet14[] = { 0x03a1, 0x03c1, 0x03f1, 0 };
const UChar32 unicodeCharacterSet15[] = { 0x03a3, 0x03c2, 0x03c3, 0 };
const UChar32 unicodeCharacterSet16[] = { 0x03a6, 0x03c6, 0x03d5, 0 };
const UChar32 unicodeCharacterSet17[] = { 0x03a9, 0x03c9, 0x2126, 0 };
const UChar32 unicodeCharacterSet18[] = { 0x0412, 0x0432, 0x1c80, 0 };
const UChar32 unicodeCharacterSet19[] = { 0x0414, 0x0434, 0x1c81, 0 };
const UChar32 unicodeCharacterSet20[] = { 0x041e, 0x043e, 0x1c82, 0 };
const UChar32 unicodeCharacterSet21[] = { 0x0421, 0x0441, 0x1c83, 0 };
const UChar32 unicodeCharacterSet22[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 };
const UChar32 unicodeCharacterSet23[] = { 0x042a, 0x044a, 0x1c86, 0 };
const UChar32 unicodeCharacterSet24[] = { 0x0462, 0x0463, 0x1c87, 0 };
const UChar32 unicodeCharacterSet25[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
const UChar32 unicodeCharacterSet26[] = { 0x1c88, 0xa64a, 0xa64b, 0 };
constexpr char32_t unicodeCharacterSet0[] = { 0x004b, 0x006b, 0x212a, 0 };
constexpr char32_t unicodeCharacterSet1[] = { 0x0053, 0x0073, 0x017f, 0 };
constexpr char32_t unicodeCharacterSet2[] = { 0x00c5, 0x00e5, 0x212b, 0 };
constexpr char32_t unicodeCharacterSet3[] = { 0x01c4, 0x01c5, 0x01c6, 0 };
constexpr char32_t unicodeCharacterSet4[] = { 0x01c7, 0x01c8, 0x01c9, 0 };
constexpr char32_t unicodeCharacterSet5[] = { 0x01ca, 0x01cb, 0x01cc, 0 };
constexpr char32_t unicodeCharacterSet6[] = { 0x01f1, 0x01f2, 0x01f3, 0 };
constexpr char32_t unicodeCharacterSet7[] = { 0x0392, 0x03b2, 0x03d0, 0 };
constexpr char32_t unicodeCharacterSet8[] = { 0x0395, 0x03b5, 0x03f5, 0 };
constexpr char32_t unicodeCharacterSet9[] = { 0x0398, 0x03b8, 0x03d1, 0x03f4, 0 };
constexpr char32_t unicodeCharacterSet10[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 };
constexpr char32_t unicodeCharacterSet11[] = { 0x039a, 0x03ba, 0x03f0, 0 };
constexpr char32_t unicodeCharacterSet12[] = { 0x00b5, 0x039c, 0x03bc, 0 };
constexpr char32_t unicodeCharacterSet13[] = { 0x03a0, 0x03c0, 0x03d6, 0 };
constexpr char32_t unicodeCharacterSet14[] = { 0x03a1, 0x03c1, 0x03f1, 0 };
constexpr char32_t unicodeCharacterSet15[] = { 0x03a3, 0x03c2, 0x03c3, 0 };
constexpr char32_t unicodeCharacterSet16[] = { 0x03a6, 0x03c6, 0x03d5, 0 };
constexpr char32_t unicodeCharacterSet17[] = { 0x03a9, 0x03c9, 0x2126, 0 };
constexpr char32_t unicodeCharacterSet18[] = { 0x0412, 0x0432, 0x1c80, 0 };
constexpr char32_t unicodeCharacterSet19[] = { 0x0414, 0x0434, 0x1c81, 0 };
constexpr char32_t unicodeCharacterSet20[] = { 0x041e, 0x043e, 0x1c82, 0 };
constexpr char32_t unicodeCharacterSet21[] = { 0x0421, 0x0441, 0x1c83, 0 };
constexpr char32_t unicodeCharacterSet22[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 };
constexpr char32_t unicodeCharacterSet23[] = { 0x042a, 0x044a, 0x1c86, 0 };
constexpr char32_t unicodeCharacterSet24[] = { 0x0462, 0x0463, 0x1c87, 0 };
constexpr char32_t unicodeCharacterSet25[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
constexpr char32_t unicodeCharacterSet26[] = { 0x1c88, 0xa64a, 0xa64b, 0 };
static const size_t UNICODE_CANONICALIZATION_SETS = 27;
const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {
constexpr size_t UNICODE_CANONICALIZATION_SETS = 27;
#if defined(COMPILER_MSVC)
const char32_t* unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {
#else
constexpr const char32_t* unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {
#endif
unicodeCharacterSet0,
unicodeCharacterSet1,
unicodeCharacterSet2,
@ -90,8 +93,13 @@ const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {
unicodeCharacterSet26,
};
const size_t UNICODE_CANONICALIZATION_RANGES = 495;
#if defined(COMPILER_MSVC)
const size_t UNICODE_CANONICALIZATION_RANGES = 534;
const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {
#else
constexpr size_t UNICODE_CANONICALIZATION_RANGES = 534;
constexpr CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {
#endif
{ 0x0000, 0x0040, 0x0000, CanonicalizeUnique },
{ 0x0041, 0x004a, 0x0020, CanonicalizeRangeLo },
{ 0x004b, 0x004b, 0x0000, CanonicalizeSet },
@ -233,7 +241,8 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0x027d, 0x027d, 0x29e7, CanonicalizeRangeLo },
{ 0x027e, 0x027f, 0x0000, CanonicalizeUnique },
{ 0x0280, 0x0280, 0x00da, CanonicalizeRangeHi },
{ 0x0281, 0x0282, 0x0000, CanonicalizeUnique },
{ 0x0281, 0x0281, 0x0000, CanonicalizeUnique },
{ 0x0282, 0x0282, 0xa543, CanonicalizeRangeLo },
{ 0x0283, 0x0283, 0x00da, CanonicalizeRangeHi },
{ 0x0284, 0x0286, 0x0000, CanonicalizeUnique },
{ 0x0287, 0x0287, 0xa52a, CanonicalizeRangeLo },
@ -374,7 +383,11 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo },
{ 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique },
{ 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo },
{ 0x10ce, 0x139f, 0x0000, CanonicalizeUnique },
{ 0x10ce, 0x10cf, 0x0000, CanonicalizeUnique },
{ 0x10d0, 0x10fa, 0x0bc0, CanonicalizeRangeLo },
{ 0x10fb, 0x10fc, 0x0000, CanonicalizeUnique },
{ 0x10fd, 0x10ff, 0x0bc0, CanonicalizeRangeLo },
{ 0x1100, 0x139f, 0x0000, CanonicalizeUnique },
{ 0x13a0, 0x13ef, 0x97d0, CanonicalizeRangeLo },
{ 0x13f0, 0x13f5, 0x0008, CanonicalizeRangeLo },
{ 0x13f6, 0x13f7, 0x0000, CanonicalizeUnique },
@ -388,11 +401,17 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0x1c86, 0x1c86, 0x0017, CanonicalizeSet },
{ 0x1c87, 0x1c87, 0x0018, CanonicalizeSet },
{ 0x1c88, 0x1c88, 0x001a, CanonicalizeSet },
{ 0x1c89, 0x1d78, 0x0000, CanonicalizeUnique },
{ 0x1c89, 0x1c8f, 0x0000, CanonicalizeUnique },
{ 0x1c90, 0x1cba, 0x0bc0, CanonicalizeRangeHi },
{ 0x1cbb, 0x1cbc, 0x0000, CanonicalizeUnique },
{ 0x1cbd, 0x1cbf, 0x0bc0, CanonicalizeRangeHi },
{ 0x1cc0, 0x1d78, 0x0000, CanonicalizeUnique },
{ 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo },
{ 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique },
{ 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo },
{ 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique },
{ 0x1d7e, 0x1d8d, 0x0000, CanonicalizeUnique },
{ 0x1d8e, 0x1d8e, 0x8a38, CanonicalizeRangeLo },
{ 0x1d8f, 0x1dff, 0x0000, CanonicalizeUnique },
{ 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1e60, 0x1e61, 0x0019, CanonicalizeSet },
{ 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned },
@ -497,10 +516,8 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0x24b6, 0x24cf, 0x001a, CanonicalizeRangeLo },
{ 0x24d0, 0x24e9, 0x001a, CanonicalizeRangeHi },
{ 0x24ea, 0x2bff, 0x0000, CanonicalizeUnique },
{ 0x2c00, 0x2c2e, 0x0030, CanonicalizeRangeLo },
{ 0x2c2f, 0x2c2f, 0x0000, CanonicalizeUnique },
{ 0x2c30, 0x2c5e, 0x0030, CanonicalizeRangeHi },
{ 0x2c5f, 0x2c5f, 0x0000, CanonicalizeUnique },
{ 0x2c00, 0x2c2f, 0x0030, CanonicalizeRangeLo },
{ 0x2c30, 0x2c5f, 0x0030, CanonicalizeRangeHi },
{ 0x2c60, 0x2c61, 0x0000, CanonicalizeAlternatingAligned },
{ 0x2c62, 0x2c62, 0x29f7, CanonicalizeRangeHi },
{ 0x2c63, 0x2c63, 0x0ee6, CanonicalizeRangeHi },
@ -548,7 +565,8 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0xa78d, 0xa78d, 0xa528, CanonicalizeRangeHi },
{ 0xa78e, 0xa78f, 0x0000, CanonicalizeUnique },
{ 0xa790, 0xa793, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa794, 0xa795, 0x0000, CanonicalizeUnique },
{ 0xa794, 0xa794, 0x0030, CanonicalizeRangeLo },
{ 0xa795, 0xa795, 0x0000, CanonicalizeUnique },
{ 0xa796, 0xa7a9, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7aa, 0xa7aa, 0xa544, CanonicalizeRangeHi },
{ 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi },
@ -560,8 +578,18 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi },
{ 0xa7b2, 0xa7b2, 0xa515, CanonicalizeRangeHi },
{ 0xa7b3, 0xa7b3, 0x03a0, CanonicalizeRangeLo },
{ 0xa7b4, 0xa7b7, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7b8, 0xab52, 0x0000, CanonicalizeUnique },
{ 0xa7b4, 0xa7c3, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7c4, 0xa7c4, 0x0030, CanonicalizeRangeHi },
{ 0xa7c5, 0xa7c5, 0xa543, CanonicalizeRangeHi },
{ 0xa7c6, 0xa7c6, 0x8a38, CanonicalizeRangeHi },
{ 0xa7c7, 0xa7ca, 0x0000, CanonicalizeAlternatingUnaligned },
{ 0xa7cb, 0xa7cf, 0x0000, CanonicalizeUnique },
{ 0xa7d0, 0xa7d1, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7d2, 0xa7d5, 0x0000, CanonicalizeUnique },
{ 0xa7d6, 0xa7d9, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa7da, 0xa7f4, 0x0000, CanonicalizeUnique },
{ 0xa7f5, 0xa7f6, 0x0000, CanonicalizeAlternatingUnaligned },
{ 0xa7f7, 0xab52, 0x0000, CanonicalizeUnique },
{ 0xab53, 0xab53, 0x03a0, CanonicalizeRangeHi },
{ 0xab54, 0xab6f, 0x0000, CanonicalizeUnique },
{ 0xab70, 0xabbf, 0x97d0, CanonicalizeRangeHi },
@ -576,17 +604,36 @@ const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] =
{ 0x104b0, 0x104d3, 0x0028, CanonicalizeRangeLo },
{ 0x104d4, 0x104d7, 0x0000, CanonicalizeUnique },
{ 0x104d8, 0x104fb, 0x0028, CanonicalizeRangeHi },
{ 0x104fc, 0x10c7f, 0x0000, CanonicalizeUnique },
{ 0x104fc, 0x1056f, 0x0000, CanonicalizeUnique },
{ 0x10570, 0x1057a, 0x0027, CanonicalizeRangeLo },
{ 0x1057b, 0x1057b, 0x0000, CanonicalizeUnique },
{ 0x1057c, 0x1058a, 0x0027, CanonicalizeRangeLo },
{ 0x1058b, 0x1058b, 0x0000, CanonicalizeUnique },
{ 0x1058c, 0x10592, 0x0027, CanonicalizeRangeLo },
{ 0x10593, 0x10593, 0x0000, CanonicalizeUnique },
{ 0x10594, 0x10595, 0x0027, CanonicalizeRangeLo },
{ 0x10596, 0x10596, 0x0000, CanonicalizeUnique },
{ 0x10597, 0x105a1, 0x0027, CanonicalizeRangeHi },
{ 0x105a2, 0x105a2, 0x0000, CanonicalizeUnique },
{ 0x105a3, 0x105b1, 0x0027, CanonicalizeRangeHi },
{ 0x105b2, 0x105b2, 0x0000, CanonicalizeUnique },
{ 0x105b3, 0x105b9, 0x0027, CanonicalizeRangeHi },
{ 0x105ba, 0x105ba, 0x0000, CanonicalizeUnique },
{ 0x105bb, 0x105bc, 0x0027, CanonicalizeRangeHi },
{ 0x105bd, 0x10c7f, 0x0000, CanonicalizeUnique },
{ 0x10c80, 0x10cb2, 0x0040, CanonicalizeRangeLo },
{ 0x10cb3, 0x10cbf, 0x0000, CanonicalizeUnique },
{ 0x10cc0, 0x10cf2, 0x0040, CanonicalizeRangeHi },
{ 0x10cf3, 0x1189f, 0x0000, CanonicalizeUnique },
{ 0x118a0, 0x118bf, 0x0020, CanonicalizeRangeLo },
{ 0x118c0, 0x118df, 0x0020, CanonicalizeRangeHi },
{ 0x118e0, 0x1e8ff, 0x0000, CanonicalizeUnique },
{ 0x118e0, 0x16e3f, 0x0000, CanonicalizeUnique },
{ 0x16e40, 0x16e5f, 0x0020, CanonicalizeRangeLo },
{ 0x16e60, 0x16e7f, 0x0020, CanonicalizeRangeHi },
{ 0x16e80, 0x1e8ff, 0x0000, CanonicalizeUnique },
{ 0x1e900, 0x1e921, 0x0022, CanonicalizeRangeLo },
{ 0x1e922, 0x1e943, 0x0022, CanonicalizeRangeHi },
{ 0x1e944, 0x10ffff, 0x0000, CanonicalizeUnique },
};
}
} // JSC::Yarr
} } // JSC::Yarr

View file

@ -26,42 +26,48 @@
#include "WTFBridge.h"
#include "YarrErrorCode.h"
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
const char* errorMessage(ErrorCode error)
ASCIILiteral errorMessage(ErrorCode error)
{
#define REGEXP_ERROR_PREFIX "Invalid regular expression: "
// The order of this array must match the ErrorCode enum.
static const char* errorMessages[] = {
nullptr, // NoError
REGEXP_ERROR_PREFIX "regular expression too large", // PatternTooLarge
REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", // QuantifierOutOfOrder
REGEXP_ERROR_PREFIX "nothing to repeat", // QuantifierWithoutAtom
REGEXP_ERROR_PREFIX "number too large in {} quantifier", // QuantifierTooLarge
REGEXP_ERROR_PREFIX "quantifier is unmatched", // QuantifierUnmatched
REGEXP_ERROR_PREFIX "missing )", // MissingParentheses
REGEXP_ERROR_PREFIX "unmatched parentheses", // ParenthesesUnmatched
REGEXP_ERROR_PREFIX "unrecognized character after (?", // ParenthesesTypeInvalid
REGEXP_ERROR_PREFIX "invalid group specifier name", // InvalidGroupName
REGEXP_ERROR_PREFIX "duplicate group specifier name", // DuplicateGroupName
REGEXP_ERROR_PREFIX "missing terminating ] for character class", // CharacterClassUnmatched
REGEXP_ERROR_PREFIX "character class is invalid", // CharacterClassInvalid
REGEXP_ERROR_PREFIX "range out of order in character class", // CharacterClassOutOfOrder
REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated
REGEXP_ERROR_PREFIX "invalid unicode {} escape", // InvalidUnicodeEscape
REGEXP_ERROR_PREFIX "invalid class escape", // InvalidClassEscape
REGEXP_ERROR_PREFIX "invalid backreference for unicode pattern", // InvalidBackreference
REGEXP_ERROR_PREFIX "invalid escaped character for unicode pattern", // InvalidIdentityEscape
REGEXP_ERROR_PREFIX "invalid property expression", // InvalidUnicodePropertyExpression
REGEXP_ERROR_PREFIX "invalid decimal escape", // InvalidDecimalEscape
REGEXP_ERROR_PREFIX "invalid quantifier", // InvalidQuantifier
REGEXP_ERROR_PREFIX "too many nested disjunctions", // TooManyDisjunctions
REGEXP_ERROR_PREFIX "pattern exceeds string length limits", // OffsetTooLarge
REGEXP_ERROR_PREFIX "invalid flags" // InvalidRegularExpressionFlags
static const ASCIILiteral errorMessages[] = {
{ }, // NoError
REGEXP_ERROR_PREFIX "regular expression too large", // PatternTooLarge
REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", // QuantifierOutOfOrder
REGEXP_ERROR_PREFIX "nothing to repeat", // QuantifierWithoutAtom
REGEXP_ERROR_PREFIX "number too large in {} quantifier", // QuantifierTooLarge
REGEXP_ERROR_PREFIX "incomplete {} quantifier for Unicode pattern", // QuantifierIncomplete
REGEXP_ERROR_PREFIX "invalid quantifier", // CantQuantifyAtom
REGEXP_ERROR_PREFIX "missing )", // MissingParentheses
REGEXP_ERROR_PREFIX "unmatched ] or } bracket for Unicode pattern", // BracketUnmatched
REGEXP_ERROR_PREFIX "unmatched parentheses", // ParenthesesUnmatched
REGEXP_ERROR_PREFIX "unrecognized character after (?", // ParenthesesTypeInvalid
REGEXP_ERROR_PREFIX "invalid group specifier name", // InvalidGroupName
REGEXP_ERROR_PREFIX "duplicate group specifier name", // DuplicateGroupName
REGEXP_ERROR_PREFIX "missing terminating ] for character class", // CharacterClassUnmatched
REGEXP_ERROR_PREFIX "range out of order in character class", // CharacterClassRangeOutOfOrder
REGEXP_ERROR_PREFIX "invalid range in character class for Unicode pattern", // CharacterClassRangeInvalid
REGEXP_ERROR_PREFIX "missing terminating } for class string disjunction", // ClassStringDisjunctionUnmatched
REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated
REGEXP_ERROR_PREFIX "invalid Unicode \\u escape", // InvalidUnicodeEscape
REGEXP_ERROR_PREFIX "invalid Unicode code point \\u{} escape", // InvalidUnicodeCodePointEscape
REGEXP_ERROR_PREFIX "invalid backreference for Unicode pattern", // InvalidBackreference
REGEXP_ERROR_PREFIX "invalid \\k<> named backreference", // InvalidNamedBackReference
REGEXP_ERROR_PREFIX "invalid escaped character for Unicode pattern", // InvalidIdentityEscape
REGEXP_ERROR_PREFIX "invalid octal escape for Unicode pattern", // InvalidOctalEscape
REGEXP_ERROR_PREFIX "invalid \\c escape for Unicode pattern", // InvalidControlLetterEscape
REGEXP_ERROR_PREFIX "invalid property expression", // InvalidUnicodePropertyExpression
REGEXP_ERROR_PREFIX "too many nested disjunctions", // TooManyDisjunctions
REGEXP_ERROR_PREFIX "pattern exceeds string length limits", // OffsetTooLarge
REGEXP_ERROR_PREFIX "invalid flags", // InvalidRegularExpressionFlags
REGEXP_ERROR_PREFIX "invalid operation in class set", // InvalidClassSetOperation
REGEXP_ERROR_PREFIX "negated class set may contain strings", // NegatedClassSetMayContainStrings
REGEXP_ERROR_PREFIX "invalid class set character" // InvalidClassSetCharacter
};
return errorMessages[static_cast<unsigned>(error)];
}
}
} // namespace JSC::Yarr
} } // namespace JSC::Yarr

View file

@ -26,40 +26,59 @@
#pragma once
namespace JSC {
class CallFrame;
class JSGlobalObject;
class JSObject;
namespace Yarr {
enum class ErrorCode : unsigned {
enum class ErrorCode : uint8_t {
NoError = 0,
PatternTooLarge,
QuantifierOutOfOrder,
QuantifierWithoutAtom,
QuantifierTooLarge,
QuantifierUnmatched,
QuantifierIncomplete,
CantQuantifyAtom,
MissingParentheses,
BracketUnmatched,
ParenthesesUnmatched,
ParenthesesTypeInvalid,
InvalidGroupName,
DuplicateGroupName,
CharacterClassUnmatched,
CharacterClassInvalid,
CharacterClassOutOfOrder,
CharacterClassRangeOutOfOrder,
CharacterClassRangeInvalid,
ClassStringDisjunctionUnmatched,
EscapeUnterminated,
InvalidUnicodeEscape,
InvalidClassEscape,
InvalidUnicodeCodePointEscape,
InvalidBackreference,
InvalidNamedBackReference,
InvalidIdentityEscape,
InvalidOctalEscape,
InvalidControlLetterEscape,
InvalidUnicodePropertyExpression,
InvalidDecimalEscape,
InvalidQuantifier,
TooManyDisjunctions,
OffsetTooLarge,
InvalidRegularExpressionFlags,
InvalidClassSetOperation,
NegatedClassSetMayContainStrings,
InvalidClassSetCharacter,
};
JS_EXPORT_PRIVATE const char* errorMessage(ErrorCode);
JS_EXPORT_PRIVATE ASCIILiteral errorMessage(ErrorCode);
inline bool hasError(ErrorCode errorCode)
{
return errorCode != ErrorCode::NoError;
}
inline bool hasHardError(ErrorCode errorCode)
{
// TooManyDisjunctions means that we ran out stack compiling.
// All other errors are due to problems in the expression.
return hasError(errorCode) && errorCode != ErrorCode::TooManyDisjunctions;
}
} // namespace JSC::Yarr
} } // namespace JSC::Yarr

53
third_party/yarr/YarrFlags.h vendored Normal file
View file

@ -0,0 +1,53 @@
/*
* Copyright (C) 2019 Sony Interactive Entertainment Inc.
* Copyright (C) 2021 Apple Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
namespace JSC { namespace Yarr {
// Flags must be ordered in alphabet ordering.
#define JSC_REGEXP_FLAGS(macro) \
macro('d', HasIndices, hasIndices, 0) \
macro('g', Global, global, 1) \
macro('i', IgnoreCase, ignoreCase, 2) \
macro('m', Multiline, multiline, 3) \
macro('s', DotAll, dotAll, 4) \
macro('u', Unicode, unicode, 5) \
macro('v', UnicodeSets, unicodeSets, 6) \
macro('y', Sticky, sticky, 7) \
#define JSC_COUNT_REGEXP_FLAG(key, name, lowerCaseName, index) + 1
static constexpr unsigned numberOfFlags = 0 JSC_REGEXP_FLAGS(JSC_COUNT_REGEXP_FLAG);
#undef JSC_COUNT_REGEXP_FLAG
enum class Flags : uint16_t {
#define JSC_DEFINE_REGEXP_FLAG(key, name, lowerCaseName, index) name = 1 << index,
JSC_REGEXP_FLAGS(JSC_DEFINE_REGEXP_FLAG)
#undef JSC_DEFINE_REGEXP_FLAG
DeletedValue = 1 << numberOfFlags,
};
} } // namespace JSC::Yarr

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2009, 2010-2012, 2014, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2009-2023 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -25,6 +25,8 @@
#pragma once
#include "YarrErrorCode.h"
#include "YarrFlags.h"
#include "YarrPattern.h"
namespace WTF {
@ -32,55 +34,28 @@ class BumpPointerAllocator;
}
using WTF::BumpPointerAllocator;
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
class ByteDisjunction;
struct ByteTerm {
enum Type {
TypeBodyAlternativeBegin,
TypeBodyAlternativeDisjunction,
TypeBodyAlternativeEnd,
TypeAlternativeBegin,
TypeAlternativeDisjunction,
TypeAlternativeEnd,
TypeSubpatternBegin,
TypeSubpatternEnd,
TypeAssertionBOL,
TypeAssertionEOL,
TypeAssertionWordBoundary,
TypePatternCharacterOnce,
TypePatternCharacterFixed,
TypePatternCharacterGreedy,
TypePatternCharacterNonGreedy,
TypePatternCasedCharacterOnce,
TypePatternCasedCharacterFixed,
TypePatternCasedCharacterGreedy,
TypePatternCasedCharacterNonGreedy,
TypeCharacterClass,
TypeBackReference,
TypeParenthesesSubpattern,
TypeParenthesesSubpatternOnceBegin,
TypeParenthesesSubpatternOnceEnd,
TypeParenthesesSubpatternTerminalBegin,
TypeParenthesesSubpatternTerminalEnd,
TypeParentheticalAssertionBegin,
TypeParentheticalAssertionEnd,
TypeCheckInput,
TypeUncheckInput,
TypeDotStarEnclosure,
} type;
union {
struct {
union {
UChar32 patternCharacter;
char32_t patternCharacter;
struct {
UChar32 lo;
UChar32 hi;
char32_t lo;
char32_t hi;
} casedCharacter;
CharacterClass* characterClass;
unsigned subpatternId;
struct {
unsigned subpatternId;
unsigned duplicateNamedGroupId;
} parenIds;
struct {
unsigned firstSubpatternId;
unsigned lastSubpatternId;
} assertionIds;
};
union {
ByteDisjunction* parenthesesDisjunction;
@ -101,91 +76,139 @@ struct ByteTerm {
} anchors;
unsigned checkInputCount;
};
unsigned frameLocation;
unsigned frameLocation { 0 };
enum class Type : uint8_t {
BodyAlternativeBegin,
BodyAlternativeDisjunction,
BodyAlternativeEnd,
AlternativeBegin,
AlternativeDisjunction,
AlternativeEnd,
SubpatternBegin,
SubpatternEnd,
AssertionBOL,
AssertionEOL,
AssertionWordBoundary,
// Character Types
PatternCharacterOnce,
PatternCharacterFixed,
PatternCharacterGreedy,
PatternCharacterNonGreedy,
// Cased Characeter Types
PatternCasedCharacterOnce,
PatternCasedCharacterFixed,
PatternCasedCharacterGreedy,
PatternCasedCharacterNonGreedy,
CharacterClass,
BackReference,
ParenthesesSubpattern,
ParenthesesSubpatternOnceBegin,
ParenthesesSubpatternOnceEnd,
ParenthesesSubpatternTerminalBegin,
ParenthesesSubpatternTerminalEnd,
ParentheticalAssertionBegin,
ParentheticalAssertionEnd,
CheckInput,
UncheckInput,
HaveCheckedInput,
DotStarEnclosure,
};
Type type;
bool m_capture : 1;
bool m_invert : 1;
unsigned inputPosition;
MatchDirection m_matchDirection : 1;
unsigned inputPosition { 0 };
ByteTerm(UChar32 ch, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ByteTerm(char32_t ch, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
: frameLocation(frameLocation)
, m_capture(false)
, m_invert(false)
, m_matchDirection(Forward)
, inputPosition(inputPos)
{
atom.patternCharacter = ch;
atom.quantityType = quantityType;
atom.quantityMinCount = quantityCount.unsafeGet();
atom.quantityMaxCount = quantityCount.unsafeGet();
inputPosition = inputPos;
atom.quantityMinCount = quantityCount;
atom.quantityMaxCount = quantityCount;
switch (quantityType) {
case QuantifierFixedCount:
type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
case QuantifierType::FixedCount:
type = (quantityCount == 1) ? ByteTerm::Type::PatternCharacterOnce : ByteTerm::Type::PatternCharacterFixed;
break;
case QuantifierGreedy:
type = ByteTerm::TypePatternCharacterGreedy;
case QuantifierType::Greedy:
atom.quantityMinCount = 0;
type = ByteTerm::Type::PatternCharacterGreedy;
break;
case QuantifierNonGreedy:
type = ByteTerm::TypePatternCharacterNonGreedy;
case QuantifierType::NonGreedy:
atom.quantityMinCount = 0;
type = ByteTerm::Type::PatternCharacterNonGreedy;
break;
}
}
ByteTerm(UChar32 lo, UChar32 hi, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ByteTerm(char32_t lo, char32_t hi, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
: frameLocation(frameLocation)
, m_capture(false)
, m_invert(false)
, m_matchDirection(Forward)
, inputPosition(inputPos)
{
switch (quantityType) {
case QuantifierFixedCount:
type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
case QuantifierType::FixedCount:
type = (quantityCount == 1) ? ByteTerm::Type::PatternCasedCharacterOnce : ByteTerm::Type::PatternCasedCharacterFixed;
atom.quantityMinCount = quantityCount;
break;
case QuantifierGreedy:
type = ByteTerm::TypePatternCasedCharacterGreedy;
case QuantifierType::Greedy:
type = ByteTerm::Type::PatternCasedCharacterGreedy;
atom.quantityMinCount = 0;
break;
case QuantifierNonGreedy:
type = ByteTerm::TypePatternCasedCharacterNonGreedy;
case QuantifierType::NonGreedy:
type = ByteTerm::Type::PatternCasedCharacterNonGreedy;
atom.quantityMinCount = 0;
break;
}
atom.casedCharacter.lo = lo;
atom.casedCharacter.hi = hi;
atom.quantityType = quantityType;
atom.quantityMinCount = quantityCount.unsafeGet();
atom.quantityMaxCount = quantityCount.unsafeGet();
inputPosition = inputPos;
atom.quantityMaxCount = quantityCount;
}
ByteTerm(CharacterClass* characterClass, bool invert, unsigned inputPos)
: type(ByteTerm::TypeCharacterClass)
: type(ByteTerm::Type::CharacterClass)
, m_capture(false)
, m_invert(invert)
, m_matchDirection(Forward)
, inputPosition(inputPos)
{
atom.characterClass = characterClass;
atom.quantityType = QuantifierFixedCount;
atom.quantityType = QuantifierType::FixedCount;
atom.quantityMinCount = 1;
atom.quantityMaxCount = 1;
inputPosition = inputPos;
}
ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, unsigned inputPos)
: type(type)
, m_capture(capture)
, m_invert(false)
, m_matchDirection(Forward)
, inputPosition(inputPos)
{
atom.subpatternId = subpatternId;
atom.parenIds.subpatternId = subpatternId;
atom.parenIds.duplicateNamedGroupId = 0;
atom.parenthesesDisjunction = parenthesesInfo;
atom.quantityType = QuantifierFixedCount;
atom.quantityType = QuantifierType::FixedCount;
atom.quantityMinCount = 1;
atom.quantityMaxCount = 1;
inputPosition = inputPos;
}
ByteTerm(Type type, bool invert = false)
: type(type)
, m_capture(false)
, m_invert(invert)
, m_matchDirection(Forward)
{
atom.quantityType = QuantifierFixedCount;
atom.quantityType = QuantifierType::FixedCount;
atom.quantityMinCount = 1;
atom.quantityMaxCount = 1;
}
@ -194,57 +217,81 @@ struct ByteTerm {
: type(type)
, m_capture(capture)
, m_invert(invert)
, m_matchDirection(Forward)
, inputPosition(inputPos)
{
atom.subpatternId = subpatternId;
atom.quantityType = QuantifierFixedCount;
atom.parenIds.subpatternId = subpatternId;
atom.parenIds.duplicateNamedGroupId = 0;
atom.quantityType = QuantifierType::FixedCount;
atom.quantityMinCount = 1;
atom.quantityMaxCount = 1;
}
ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, MatchDirection matchDirection, unsigned inputPos)
: type(type)
, m_capture(capture)
, m_invert(invert)
, m_matchDirection(matchDirection)
, inputPosition(inputPos)
{
atom.parenIds.subpatternId = subpatternId;
atom.parenIds.duplicateNamedGroupId = 0;
atom.quantityType = QuantifierType::FixedCount;
atom.quantityMinCount = 1;
atom.quantityMaxCount = 1;
inputPosition = inputPos;
}
static ByteTerm BOL(unsigned inputPos)
{
ByteTerm term(TypeAssertionBOL);
ByteTerm term(Type::AssertionBOL);
term.inputPosition = inputPos;
return term;
}
static ByteTerm CheckInput(Checked<unsigned> count)
{
ByteTerm term(TypeCheckInput);
term.checkInputCount = count.unsafeGet();
ByteTerm term(Type::CheckInput);
term.checkInputCount = count;
return term;
}
static ByteTerm UncheckInput(Checked<unsigned> count)
{
ByteTerm term(TypeUncheckInput);
term.checkInputCount = count.unsafeGet();
ByteTerm term(Type::UncheckInput);
term.checkInputCount = count;
return term;
}
static ByteTerm HaveCheckedInput(Checked<unsigned> count)
{
ByteTerm term(Type::HaveCheckedInput);
term.checkInputCount = count;
return term;
}
static ByteTerm EOL(unsigned inputPos)
{
ByteTerm term(TypeAssertionEOL);
ByteTerm term(Type::AssertionEOL);
term.inputPosition = inputPos;
return term;
}
static ByteTerm WordBoundary(bool invert, unsigned inputPos)
static ByteTerm WordBoundary(bool invert, MatchDirection matchDirection, unsigned inputPos)
{
ByteTerm term(TypeAssertionWordBoundary, invert);
ByteTerm term(Type::AssertionWordBoundary, invert);
term.m_matchDirection = matchDirection;
term.inputPosition = inputPos;
return term;
}
static ByteTerm BackReference(unsigned subpatternId, unsigned inputPos)
static ByteTerm BackReference(unsigned subpatternId, MatchDirection matchDirection, unsigned inputPos)
{
return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
return ByteTerm(Type::BackReference, subpatternId, false, false, matchDirection, inputPos);
}
static ByteTerm BodyAlternativeBegin(bool onceThrough)
{
ByteTerm term(TypeBodyAlternativeBegin);
ByteTerm term(Type::BodyAlternativeBegin);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = onceThrough;
@ -253,7 +300,7 @@ struct ByteTerm {
static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
{
ByteTerm term(TypeBodyAlternativeDisjunction);
ByteTerm term(Type::BodyAlternativeDisjunction);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = onceThrough;
@ -262,7 +309,7 @@ struct ByteTerm {
static ByteTerm BodyAlternativeEnd()
{
ByteTerm term(TypeBodyAlternativeEnd);
ByteTerm term(Type::BodyAlternativeEnd);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = false;
@ -271,7 +318,7 @@ struct ByteTerm {
static ByteTerm AlternativeBegin()
{
ByteTerm term(TypeAlternativeBegin);
ByteTerm term(Type::AlternativeBegin);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = false;
@ -280,7 +327,7 @@ struct ByteTerm {
static ByteTerm AlternativeDisjunction()
{
ByteTerm term(TypeAlternativeDisjunction);
ByteTerm term(Type::AlternativeDisjunction);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = false;
@ -289,7 +336,7 @@ struct ByteTerm {
static ByteTerm AlternativeEnd()
{
ByteTerm term(TypeAlternativeEnd);
ByteTerm term(Type::AlternativeEnd);
term.alternative.next = 0;
term.alternative.end = 0;
term.alternative.onceThrough = false;
@ -298,27 +345,92 @@ struct ByteTerm {
static ByteTerm SubpatternBegin()
{
return ByteTerm(TypeSubpatternBegin);
return ByteTerm(Type::SubpatternBegin);
}
static ByteTerm SubpatternEnd()
{
return ByteTerm(TypeSubpatternEnd);
return ByteTerm(Type::SubpatternEnd);
}
static ByteTerm ParentheticalAssertionBegin(unsigned firstSubpatternId, bool invert, MatchDirection matchDirection)
{
ByteTerm term(Type::ParentheticalAssertionBegin);
term.atom.assertionIds.firstSubpatternId = firstSubpatternId;
term.m_invert = invert;
term.m_matchDirection = matchDirection;
return term;
}
static ByteTerm ParentheticalAssertionEnd(unsigned firstSubpatternId, unsigned lastSubpatternId, bool invert, MatchDirection matchDirection)
{
ByteTerm term(Type::ParentheticalAssertionEnd);
term.atom.assertionIds.firstSubpatternId = firstSubpatternId;
term.atom.assertionIds.lastSubpatternId = lastSubpatternId;
term.m_invert = invert;
term.m_matchDirection = matchDirection;
return term;
}
static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
{
ByteTerm term(TypeDotStarEnclosure);
ByteTerm term(Type::DotStarEnclosure);
term.anchors.m_bol = bolAnchor;
term.anchors.m_eol = eolAnchor;
return term;
}
bool isCharacterType()
{
return type >= Type::PatternCharacterOnce && type <= Type::PatternCharacterNonGreedy;
}
bool isCasedCharacterType()
{
return type >= Type::PatternCasedCharacterOnce && type <= Type::PatternCasedCharacterNonGreedy;
}
bool isCharacterClass()
{
return type == Type::CharacterClass;
}
bool containsAnyCaptures()
{
ASSERT(this->type == Type::ParentheticalAssertionBegin
|| this->type == Type::ParentheticalAssertionEnd);
return lastSubpatternId() >= firstSubpatternId();
}
unsigned subpatternId()
{
return atom.parenIds.subpatternId;
}
unsigned duplicateNamedGroupId()
{
return atom.parenIds.duplicateNamedGroupId;
}
unsigned firstSubpatternId()
{
return atom.assertionIds.firstSubpatternId;
}
unsigned lastSubpatternId()
{
return atom.assertionIds.lastSubpatternId;
}
bool invert()
{
return m_invert;
}
MatchDirection matchDirection()
{
return m_matchDirection;
}
bool capture()
{
return m_capture;
@ -326,8 +438,6 @@ struct ByteTerm {
};
class ByteDisjunction {
WTF_MAKE_FAST_ALLOCATED;
public:
ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
: m_numSubpatterns(numSubpatterns)
@ -336,24 +446,26 @@ public:
}
size_t estimatedSizeInBytes() const { return terms.capacity() * sizeof(ByteTerm); }
Vector<ByteTerm> terms;
unsigned m_numSubpatterns;
unsigned m_frameSize;
};
struct BytecodePattern : public gc {
WTF_MAKE_FAST_ALLOCATED;
public:
BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator, unsigned offsetVectorBaseForNamedCaptures, unsigned offsetsSize)
: m_body(WTFMove(body))
, m_flags(pattern.m_flags)
, m_allocator(allocator)
, m_offsetVectorBaseForNamedCaptures(offsetVectorBaseForNamedCaptures)
, m_offsetsSize(offsetsSize)
, m_duplicateNamedGroupForSubpatternId(pattern.m_duplicateNamedGroupForSubpatternId)
{
m_body->terms.shrinkToFit();
newlineCharacterClass = pattern.newlineCharacterClass();
if (unicode() && ignoreCase())
if (eitherUnicode() && ignoreCase())
wordcharCharacterClass = pattern.wordUnicodeIgnoreCaseCharCharacterClass();
else
wordcharCharacterClass = pattern.wordcharCharacterClass();
@ -364,43 +476,57 @@ public:
m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
m_userCharacterClasses.shrinkToFit();
GC_REGISTER_FINALIZER_NO_ORDER(this, [](void* obj, void* cd) {
BytecodePattern* pattern = (BytecodePattern*)obj;
pattern->clear();
},
NULL, NULL, NULL);
}
m_numDuplicateNamedCaptureGroups = pattern.m_numDuplicateNamedCaptureGroups;
~BytecodePattern()
{
clear();
}
void* operator new(size_t size)
{
return GC_MALLOC_ATOMIC(size);
}
void* operator new[](size_t size) = delete;
void clear()
{
deleteAllValues(m_allParenthesesInfo);
deleteAllValues(m_userCharacterClasses);
m_body.reset();
GC_REGISTER_FINALIZER_NO_ORDER(
this, [](void* obj, void*) {
BytecodePattern* self = static_cast<BytecodePattern*>(obj);
self->~BytecodePattern();
},
nullptr, nullptr, nullptr);
}
size_t estimatedSizeInBytes() const { return m_body->estimatedSizeInBytes(); }
bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
bool multiline() const { return m_flags & FlagMultiline; }
bool sticky() const { return m_flags & FlagSticky; }
bool unicode() const { return m_flags & FlagUnicode; }
bool dotAll() const { return m_flags & FlagDotAll; }
bool hasDuplicateNamedCaptureGroups() const { return !!m_numDuplicateNamedCaptureGroups; }
unsigned offsetForDuplicateNamedGroupId(unsigned duplicateNamedGroupId)
{
ASSERT(duplicateNamedGroupId);
return m_offsetVectorBaseForNamedCaptures + duplicateNamedGroupId - 1;
}
CompileMode compileMode() const
{
if (unicode())
return CompileMode::Unicode;
if (unicodeSets())
return CompileMode::UnicodeSets;
return CompileMode::Legacy;
}
bool ignoreCase() const { return m_flags.contains(Flags::IgnoreCase); }
bool multiline() const { return m_flags.contains(Flags::Multiline); }
bool hasIndices() const { return m_flags.contains(Flags::HasIndices); }
bool sticky() const { return m_flags.contains(Flags::Sticky); }
bool unicode() const { return m_flags.contains(Flags::Unicode); }
bool unicodeSets() const { return m_flags.contains(Flags::UnicodeSets); }
bool eitherUnicode() const { return unicode() || unicodeSets(); }
bool dotAll() const { return m_flags.contains(Flags::DotAll); }
std::unique_ptr<ByteDisjunction> m_body;
RegExpFlags m_flags;
OptionSet<Flags> m_flags;
// Each BytecodePattern is associated with a RegExp, each RegExp is associated
// with a VM. Cache a pointer to out VM's m_regExpAllocator.
// with a VM. Cache a pointer to our VM's m_regExpAllocator.
BumpPointerAllocator* m_allocator;
unsigned m_numDuplicateNamedCaptureGroups;
unsigned m_offsetVectorBaseForNamedCaptures;
unsigned m_offsetsSize;
Vector<unsigned> m_duplicateNamedGroupForSubpatternId;
CharacterClass* newlineCharacterClass;
CharacterClass* wordcharCharacterClass;
@ -409,9 +535,9 @@ private:
Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
};
JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*, ErrorCode&);
JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, StringView input, unsigned start, unsigned* output);
unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
}
} // namespace JSC::Yarr
} } // namespace JSC::Yarr

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
* Copyright (C) 2009-2023 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
@ -27,144 +27,221 @@
#pragma once
#include "YarrErrorCode.h"
#include "YarrFlags.h"
#include "YarrUnicodeProperties.h"
#include "CheckedArithmetic.h"
#include <stddef.h>
namespace JSC {
namespace Yarr {
enum RegExpFlags {
NoFlags = 0,
FlagGlobal = 1,
FlagIgnoreCase = 2,
FlagMultiline = 4,
FlagSticky = 8,
FlagUnicode = 16,
FlagDotAll = 32,
InvalidFlags = 64,
DeletedValueFlags = -1
};
namespace JSC { namespace Yarr {
struct YarrPattern;
struct PatternDisjunction;
struct CharacterRange {
UChar32 begin{ 0 };
UChar32 end{ 0x10ffff };
enum class CompileMode : uint8_t {
Legacy,
Unicode,
UnicodeSets
};
CharacterRange(UChar32 begin, UChar32 end)
struct CharacterRange {
char32_t begin { 0 };
char32_t end { UCHAR_MAX_VALUE };
CharacterRange(char32_t begin, char32_t end)
: begin(begin)
, end(end)
{
}
};
struct CharacterClass {
WTF_MAKE_FAST_ALLOCATED;
enum struct CharacterClassWidths : unsigned char {
Unknown = 0x0,
HasBMPChars = 0x1,
HasNonBMPChars = 0x2,
HasBothBMPAndNonBMP = HasBMPChars | HasNonBMPChars
};
inline CharacterClassWidths operator|(CharacterClassWidths lhs, CharacterClassWidths rhs)
{
return static_cast<CharacterClassWidths>(static_cast<unsigned>(lhs) | static_cast<unsigned>(rhs));
}
inline bool operator&(CharacterClassWidths lhs, CharacterClassWidths rhs)
{
return static_cast<unsigned>(lhs) & static_cast<unsigned>(rhs);
}
inline CharacterClassWidths& operator|=(CharacterClassWidths& lhs, CharacterClassWidths rhs)
{
lhs = lhs | rhs;
return lhs;
}
struct CharacterClass {
public:
// All CharacterClass instances have to have the full set of matches and ranges,
// they may have an optional m_table for faster lookups (which must match the
// specified matches and ranges)
CharacterClass()
: m_hasNonBMPCharacters(false)
: m_characterWidths(CharacterClassWidths::Unknown)
, m_anyCharacter(false)
{
}
CharacterClass(std::initializer_list<UChar32> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<UChar32> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode)
CharacterClass(std::initializer_list<char32_t> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<char32_t> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode, CharacterClassWidths widths)
: m_matches(matches)
, m_ranges(ranges)
, m_matchesUnicode(matchesUnicode)
, m_rangesUnicode(rangesUnicode)
, m_hasNonBMPCharacters(false)
, m_characterWidths(widths)
, m_anyCharacter(false)
{
}
Vector<UChar32> m_matches;
CharacterClass(std::initializer_list<Vector<char32_t>> strings, std::initializer_list<char32_t> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<char32_t> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode, CharacterClassWidths widths, bool inCanonicalForm)
: m_strings(strings)
, m_matches(matches)
, m_ranges(ranges)
, m_matchesUnicode(matchesUnicode)
, m_rangesUnicode(rangesUnicode)
, m_characterWidths(widths)
, m_anyCharacter(false)
, m_inCanonicalForm(inCanonicalForm)
{
}
bool hasNonBMPCharacters() const { return m_characterWidths & CharacterClassWidths::HasNonBMPChars; }
bool hasOneCharacterSize() const { return m_characterWidths == CharacterClassWidths::HasBMPChars || m_characterWidths == CharacterClassWidths::HasNonBMPChars; }
bool hasOnlyNonBMPCharacters() const { return m_characterWidths == CharacterClassWidths::HasNonBMPChars; }
bool hasStrings() const { return !m_strings.isEmpty(); }
bool hasSingleCharacters() const { return !m_matches.isEmpty() || !m_ranges.isEmpty() || !m_matchesUnicode.isEmpty() || !m_rangesUnicode.isEmpty(); }
Vector<Vector<char32_t>> m_strings;
Vector<char32_t> m_matches;
Vector<CharacterRange> m_ranges;
Vector<UChar32> m_matchesUnicode;
Vector<char32_t> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode;
bool m_hasNonBMPCharacters : 1;
CharacterClassWidths m_characterWidths;
bool m_anyCharacter : 1;
bool m_inCanonicalForm : 1;
};
enum QuantifierType {
QuantifierFixedCount,
QuantifierGreedy,
QuantifierNonGreedy,
struct ClassSet : public CharacterClass {
public:
ClassSet()
: CharacterClass()
, m_inCanonicalForm(true)
{
}
ClassSet(std::initializer_list<char32_t> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<char32_t> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode, CharacterClassWidths widths)
: CharacterClass(matches, ranges, matchesUnicode, rangesUnicode, widths)
, m_inCanonicalForm(true)
{
}
ClassSet(std::initializer_list<Vector<char32_t>> strings, std::initializer_list<char32_t> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<char32_t> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode, CharacterClassWidths widths)
: CharacterClass(matches, ranges, matchesUnicode, rangesUnicode, widths)
, m_strings(strings)
, m_inCanonicalForm(true)
{
}
ClassSet(std::initializer_list<Vector<char32_t>> strings, bool inCanonicalForm)
: m_strings(strings)
, m_inCanonicalForm(inCanonicalForm)
{
}
Vector<Vector<char32_t>> m_strings;
bool m_inCanonicalForm : 1;
};
enum class QuantifierType : uint8_t {
FixedCount,
Greedy,
NonGreedy,
};
enum MatchDirection : uint8_t {
// The code assumes that Forward is 0 and Backward is 1.
Forward = 0,
Backward = 1
};
struct PatternTerm {
enum Type {
TypeAssertionBOL,
TypeAssertionEOL,
TypeAssertionWordBoundary,
TypePatternCharacter,
TypeCharacterClass,
TypeBackReference,
TypeForwardReference,
TypeParenthesesSubpattern,
TypeParentheticalAssertion,
TypeDotStarEnclosure,
} type;
enum class Type : uint8_t {
AssertionBOL,
AssertionEOL,
AssertionWordBoundary,
PatternCharacter,
CharacterClass,
BackReference,
ForwardReference,
ParenthesesSubpattern,
ParentheticalAssertion,
DotStarEnclosure,
};
Type type;
bool m_capture : 1;
bool m_invert : 1;
MatchDirection m_matchDirection : 1;
QuantifierType quantityType;
Checked<unsigned> quantityMinCount;
Checked<unsigned> quantityMaxCount;
union {
UChar32 patternCharacter;
char32_t patternCharacter;
CharacterClass* characterClass;
unsigned backReferenceSubpatternId;
struct {
PatternDisjunction* disjunction;
unsigned subpatternId;
unsigned lastSubpatternId;
bool isCopy;
bool isTerminal;
bool isCopy : 1;
bool isTerminal : 1;
} parentheses;
struct {
bool bolAnchor : 1;
bool eolAnchor : 1;
} anchors;
};
QuantifierType quantityType;
Checked<unsigned> quantityMinCount;
Checked<unsigned> quantityMaxCount;
unsigned inputPosition;
unsigned frameLocation;
PatternTerm(UChar32 ch)
: type(PatternTerm::TypePatternCharacter)
PatternTerm(char32_t ch, MatchDirection matchDirection = Forward)
: type(PatternTerm::Type::PatternCharacter)
, m_capture(false)
, m_invert(false)
, m_matchDirection(matchDirection)
{
patternCharacter = ch;
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(CharacterClass* charClass, bool invert)
: type(PatternTerm::TypeCharacterClass)
PatternTerm(CharacterClass* charClass, bool invert, MatchDirection matchDirection = Forward)
: type(PatternTerm::Type::CharacterClass)
, m_capture(false)
, m_invert(invert)
, m_matchDirection(matchDirection)
{
characterClass = charClass;
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false, MatchDirection matchDirection = Forward)
: type(type)
, m_capture(capture)
, m_invert(invert)
, m_matchDirection(matchDirection)
{
parentheses.disjunction = disjunction;
parentheses.subpatternId = subpatternId;
parentheses.isCopy = false;
parentheses.isTerminal = false;
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
@ -172,66 +249,93 @@ struct PatternTerm {
: type(type)
, m_capture(false)
, m_invert(invert)
, m_matchDirection(Forward)
{
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(unsigned spatternId)
: type(TypeBackReference)
: type(Type::BackReference)
, m_capture(false)
, m_invert(false)
, m_matchDirection(Forward)
{
backReferenceSubpatternId = spatternId;
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(bool bolAnchor, bool eolAnchor)
: type(TypeDotStarEnclosure)
: type(Type::DotStarEnclosure)
, m_capture(false)
, m_invert(false)
, m_matchDirection(Forward)
{
anchors.bolAnchor = bolAnchor;
anchors.eolAnchor = eolAnchor;
quantityType = QuantifierFixedCount;
quantityType = QuantifierType::FixedCount;
quantityMinCount = quantityMaxCount = 1;
}
static PatternTerm ForwardReference()
{
return PatternTerm(TypeForwardReference);
auto term = PatternTerm(Type::ForwardReference);
term.backReferenceSubpatternId = 0;
return term;
}
static PatternTerm BOL()
{
return PatternTerm(TypeAssertionBOL);
return PatternTerm(Type::AssertionBOL);
}
static PatternTerm EOL()
{
return PatternTerm(TypeAssertionEOL);
return PatternTerm(Type::AssertionEOL);
}
static PatternTerm WordBoundary(bool invert)
{
return PatternTerm(TypeAssertionWordBoundary, invert);
return PatternTerm(Type::AssertionWordBoundary, invert);
}
bool invert()
void convertToBackreference()
{
ASSERT(type == Type::ForwardReference);
type = Type::BackReference;
}
bool invert() const
{
return m_invert;
}
void setMatchDirection(MatchDirection matchDirection)
{
m_matchDirection = matchDirection;
}
MatchDirection matchDirection() const
{
return m_matchDirection;
}
bool capture()
{
return m_capture;
}
bool isFixedWidthCharacterClass() const
{
return type == Type::CharacterClass && characterClass->hasOneCharacterSize() && !invert();
}
bool containsAnyCaptures()
{
ASSERT(this->type == TypeParenthesesSubpattern);
return parentheses.lastSubpatternId >= parentheses.subpatternId;
ASSERT(this->type == Type::ParenthesesSubpattern
|| this->type == Type::ParentheticalAssertion);
return parentheses.lastSubpatternId && parentheses.lastSubpatternId >= parentheses.subpatternId;
}
void quantify(unsigned count, QuantifierType type)
@ -244,7 +348,7 @@ struct PatternTerm {
void quantify(unsigned minCount, unsigned maxCount, QuantifierType type)
{
// Currently only Parentheses can specify a non-zero min with a different max.
ASSERT(this->type == TypeParenthesesSubpattern || !minCount || minCount == maxCount);
ASSERT(this->type == Type::ParenthesesSubpattern || !minCount || minCount == maxCount);
ASSERT(minCount <= maxCount);
quantityMinCount = minCount;
quantityMaxCount = maxCount;
@ -253,11 +357,12 @@ struct PatternTerm {
};
struct PatternAlternative {
WTF_MAKE_FAST_ALLOCATED;
public:
PatternAlternative(PatternDisjunction* disjunction)
PatternAlternative(PatternDisjunction* disjunction, unsigned firstSubpatternId, MatchDirection matchDirection = Forward)
: m_parent(disjunction)
, m_firstSubpatternId(firstSubpatternId)
, m_lastSubpatternId(0)
, m_direction(matchDirection)
, m_onceThrough(false)
, m_hasFixedSize(false)
, m_startsWithBOL(false)
@ -265,10 +370,15 @@ public:
{
}
PatternTerm& lastTerm()
unsigned lastTermIndex()
{
ASSERT(m_terms.size());
return m_terms[m_terms.size() - 1];
return m_terms.size() - 1;
}
PatternTerm& lastTerm()
{
return m_terms[lastTermIndex()];
}
void removeLastTerm()
@ -287,9 +397,35 @@ public:
return m_onceThrough;
}
bool needToCleanupCaptures() const
{
return !!m_lastSubpatternId;
}
unsigned firstCleanupSubpatternId()
{
unsigned firstSubpatternIdToClear = m_firstSubpatternId;
// We want to clear subpatterns, which start at 1.
if (!firstSubpatternIdToClear)
firstSubpatternIdToClear++;
ASSERT(firstSubpatternIdToClear <= m_lastSubpatternId);
return firstSubpatternIdToClear;
}
MatchDirection matchDirection() const
{
return m_direction;
}
Vector<PatternTerm> m_terms;
PatternDisjunction* m_parent;
unsigned m_minimumSize;
unsigned m_firstSubpatternId;
unsigned m_lastSubpatternId;
MatchDirection m_direction;
bool m_onceThrough : 1;
bool m_hasFixedSize : 1;
bool m_startsWithBOL : 1;
@ -297,18 +433,16 @@ public:
};
struct PatternDisjunction {
WTF_MAKE_FAST_ALLOCATED;
public:
PatternDisjunction(PatternAlternative* parent = 0)
PatternDisjunction(PatternAlternative* parent = nullptr)
: m_parent(parent)
, m_hasFixedSize(false)
{
}
PatternAlternative* addNewAlternative()
PatternAlternative* addNewAlternative(unsigned firstSubpatternId = 1, MatchDirection matchDirection = Forward)
{
m_alternatives.append(std::make_unique<PatternAlternative>(this));
m_alternatives.append(makeUnique<PatternAlternative>(this, firstSubpatternId, matchDirection));
return static_cast<PatternAlternative*>(m_alternatives.last().get());
}
@ -338,8 +472,7 @@ std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate();
struct TermChain {
TermChain(PatternTerm term)
: term(term)
{
}
{}
PatternTerm term;
Vector<TermChain> hotTerms;
@ -347,21 +480,20 @@ struct TermChain {
struct YarrPattern : public gc {
static YarrPattern* createYarrPattern(const String& pattern, RegExpFlags flags, ErrorCode& error, void* stackLimit = nullptr)
{
return new YarrPattern(pattern, flags, error, stackLimit);
}
JS_EXPORT_PRIVATE YarrPattern(StringView pattern, OptionSet<Flags>, ErrorCode&);
void reset()
void resetForReparsing()
{
m_numSubpatterns = 0;
m_maxBackReference = 0;
m_initialStartValueFrameLocation = 0;
m_numDuplicateNamedCaptureGroups = 0;
m_containsBackreferences = false;
m_containsBOL = false;
m_containsLookbehinds = false;
m_containsUnsignedLengthPattern = false;
m_hasCopiedParenSubexpressions = false;
m_hasNamedCaptureGroups = false;
m_saveInitialStartValue = false;
anycharCached = nullptr;
@ -374,42 +506,15 @@ struct YarrPattern : public gc {
nonspacesCached = nullptr;
nonwordcharCached = nullptr;
nonwordUnicodeIgnoreCasecharCached = nullptr;
HashMap<unsigned, CharacterClass*>().swap(unicodePropertiesCached);
unicodePropertiesCached.clear();
m_body = nullptr;
m_disjunctions.clear();
m_userCharacterClasses.clear();
m_captureGroupNames.clear();
m_namedForwardReferences.clear();
HashMap<String, unsigned>().swap(m_namedGroupToParenIndex);
m_namedGroupToParenIndices.clear();
m_duplicateNamedGroupForSubpatternId.clear();
}
bool containsIllegalBackReference()
{
return m_maxBackReference > m_numSubpatterns;
}
bool containsIllegalNamedForwardReferences()
{
if (m_namedForwardReferences.empty())
return false;
bool notContains = true;
for (auto& entry : m_namedForwardReferences) {
for (auto& entry2 : m_captureGroupNames) {
if (entry.equals(entry2)) {
notContains = false;
break;
}
}
if (notContains) {
return true;
}
notContains = true;
}
return false;
}
bool containsUnsignedLengthPattern()
{
return m_containsUnsignedLengthPattern;
@ -511,30 +616,69 @@ struct YarrPattern : public gc {
return unicodePropertiesCached.get(classID);
}
bool global() const { return m_flags & FlagGlobal; }
bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
bool multiline() const { return m_flags & FlagMultiline; }
bool sticky() const { return m_flags & FlagSticky; }
bool unicode() const { return m_flags & FlagUnicode; }
bool dotAll() const { return m_flags & FlagDotAll; }
unsigned offsetVectorBaseForNamedCaptures() const
{
return (m_numSubpatterns + 1) * 2;
}
unsigned offsetsSize() const
{
return offsetVectorBaseForNamedCaptures() + m_numDuplicateNamedCaptureGroups;
}
unsigned offsetForDuplicateNamedGroupId(unsigned duplicateNamedGroupId)
{
ASSERT(duplicateNamedGroupId);
return offsetVectorBaseForNamedCaptures() + duplicateNamedGroupId - 1;
}
bool global() const { return m_flags.contains(Flags::Global); }
bool ignoreCase() const { return m_flags.contains(Flags::IgnoreCase); }
bool multiline() const { return m_flags.contains(Flags::Multiline); }
bool hasIndices() const { return m_flags.contains(Flags::HasIndices); }
bool sticky() const { return m_flags.contains(Flags::Sticky); }
bool unicode() const { return m_flags.contains(Flags::Unicode); }
bool unicodeSets() const { return m_flags.contains(Flags::UnicodeSets); }
bool eitherUnicode() const { return unicode() || unicodeSets(); }
bool dotAll() const { return m_flags.contains(Flags::DotAll); }
bool hasDuplicateNamedCaptureGroups() const { return !!m_numDuplicateNamedCaptureGroups; }
CompileMode compileMode() const
{
if (unicode())
return CompileMode::Unicode;
if (unicodeSets())
return CompileMode::UnicodeSets;
return CompileMode::Legacy;
}
bool m_containsBackreferences : 1;
bool m_containsBOL : 1;
bool m_containsLookbehinds : 1;
bool m_containsUnsignedLengthPattern : 1;
bool m_hasCopiedParenSubexpressions : 1;
bool m_hasNamedCaptureGroups : 1;
bool m_saveInitialStartValue : 1;
RegExpFlags m_flags;
unsigned m_numSubpatterns{ 0 };
unsigned m_maxBackReference{ 0 };
unsigned m_initialStartValueFrameLocation{ 0 };
PatternDisjunction* m_body{ nullptr };
OptionSet<Flags> m_flags;
unsigned m_numSubpatterns { 0 };
unsigned m_initialStartValueFrameLocation { 0 };
unsigned m_numDuplicateNamedCaptureGroups { 0 };
PatternDisjunction* m_body { nullptr };
Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
::Escargot::Vector<String, GCUtil::gc_malloc_allocator<String>> m_captureGroupNames;
::Escargot::Vector<String, GCUtil::gc_malloc_allocator<String>> m_namedForwardReferences;
HashMap<String, unsigned> m_namedGroupToParenIndex;
// The contents of the RHS Vector of m_namedGroupToParenIndices depends on whether the String is a
// duplicate named group or not.
// For a named group that is only used once in the pattern, the vector size is one and the only entry
// is the subpatterenId for a non-duplicate named group.
// For a duplicate named group, the size will be greater than 2. The first vector entry it is the
// duplicateNamedGroupId. Subsequent vector entries are the subpatternId's for that duplicateNamedGroupId.
HashMap<String, Vector<unsigned>> m_namedGroupToParenIndices;
Vector<unsigned> m_duplicateNamedGroupForSubpatternId;
private:
JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, ErrorCode&, void* stackLimit = nullptr);
void* operator new(size_t size)
{
static MAY_THREAD_LOCAL bool typeInited = false;
@ -542,88 +686,92 @@ private:
if (!typeInited) {
GC_word obj_bitmap[GC_BITMAP_SIZE(YarrPattern)] = { 0 };
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(YarrPattern, m_captureGroupNames));
GC_set_bit(obj_bitmap, GC_WORD_OFFSET(YarrPattern, m_namedForwardReferences));
descr = GC_make_descriptor(obj_bitmap, GC_WORD_LEN(YarrPattern));
typeInited = true;
}
return GC_MALLOC_EXPLICITLY_TYPED(size, descr);
}
ErrorCode compile(const String& patternString, void* stackLimit);
private:
ErrorCode compile(StringView patternString);
CharacterClass* anycharCached{ nullptr };
CharacterClass* newlineCached{ nullptr };
CharacterClass* digitsCached{ nullptr };
CharacterClass* spacesCached{ nullptr };
CharacterClass* wordcharCached{ nullptr };
CharacterClass* wordUnicodeIgnoreCaseCharCached{ nullptr };
CharacterClass* nondigitsCached{ nullptr };
CharacterClass* nonspacesCached{ nullptr };
CharacterClass* nonwordcharCached{ nullptr };
CharacterClass* nonwordUnicodeIgnoreCasecharCached{ nullptr };
CharacterClass* anycharCached { nullptr };
CharacterClass* newlineCached { nullptr };
CharacterClass* digitsCached { nullptr };
CharacterClass* spacesCached { nullptr };
CharacterClass* wordcharCached { nullptr };
CharacterClass* wordUnicodeIgnoreCaseCharCached { nullptr };
CharacterClass* nondigitsCached { nullptr };
CharacterClass* nonspacesCached { nullptr };
CharacterClass* nonwordcharCached { nullptr };
CharacterClass* nonwordUnicodeIgnoreCasecharCached { nullptr };
HashMap<unsigned, CharacterClass*> unicodePropertiesCached;
};
struct BackTrackInfoPatternCharacter {
uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
static unsigned beginIndex() { return offsetof(BackTrackInfoPatternCharacter, begin) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoPatternCharacter, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoCharacterClass {
uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
static unsigned beginIndex() { return offsetof(BackTrackInfoCharacterClass, begin) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoCharacterClass, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoBackReference {
uintptr_t begin; // Not really needed for greedy quantifiers.
uintptr_t matchAmount; // Not really needed for fixed quantifiers.
unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoAlternative {
union {
uintptr_t offset;
static unsigned beginIndex() { return offsetof(BackTrackInfoPatternCharacter, begin) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoPatternCharacter, matchAmount) / sizeof(uintptr_t); }
};
};
struct BackTrackInfoParentheticalAssertion {
uintptr_t begin;
struct BackTrackInfoCharacterClass {
uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
static unsigned beginIndex() { return offsetof(BackTrackInfoParentheticalAssertion, begin) / sizeof(uintptr_t); }
};
static unsigned beginIndex() { return offsetof(BackTrackInfoCharacterClass, begin) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoCharacterClass, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoParenthesesOnce {
uintptr_t begin;
uintptr_t returnAddress;
struct BackTrackInfoBackReference {
uintptr_t begin; // Not really needed for greedy quantifiers.
uintptr_t matchAmount; // Not really needed for fixed quantifiers.
uintptr_t backReferenceSize; // Used by greedy quantifiers to backtrack.
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); }
static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); }
};
static unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
static unsigned backReferenceSizeIndex() { return offsetof(BackTrackInfoBackReference, backReferenceSize) / sizeof(uintptr_t); }
};
struct BackTrackInfoParenthesesTerminal {
uintptr_t begin;
struct BackTrackInfoAlternative {
union {
uintptr_t offset;
};
};
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); }
};
struct BackTrackInfoParentheticalAssertion {
uintptr_t begin;
struct BackTrackInfoParentheses {
uintptr_t begin;
uintptr_t returnAddress;
uintptr_t matchAmount;
uintptr_t parenContextHead;
static unsigned beginIndex() { return offsetof(BackTrackInfoParentheticalAssertion, begin) / sizeof(uintptr_t); }
};
static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); }
static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); }
static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); }
};
}
} // namespace JSC::Yarr
struct BackTrackInfoParenthesesOnce {
uintptr_t begin;
uintptr_t returnAddress;
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); }
static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); }
};
struct BackTrackInfoParenthesesTerminal {
uintptr_t begin;
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); }
};
struct BackTrackInfoParentheses {
uintptr_t begin;
uintptr_t returnAddress;
uintptr_t matchAmount;
uintptr_t parenContextHead;
static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); }
static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); }
static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); }
static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); }
};
} } // namespace JSC::Yarr
using JSC::Yarr::MatchDirection;

1
third_party/yarr/YarrSourceComesFrom vendored Normal file
View file

@ -0,0 +1 @@
webkitgtk-2.44.2

View file

@ -1,63 +0,0 @@
/*
* Copyright (C) 2011, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "WTFBridge.h"
#include "YarrSyntaxChecker.h"
#include "YarrParser.h"
namespace JSC {
namespace Yarr {
class SyntaxChecker {
public:
void assertionBOL() {}
void assertionEOL() {}
void assertionWordBoundary(bool) {}
void atomPatternCharacter(UChar32) {}
void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {}
void atomCharacterClassBegin(bool = false) {}
void atomCharacterClassAtom(UChar) {}
void atomCharacterClassRange(UChar, UChar) {}
void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {}
void atomCharacterClassEnd() {}
void atomParenthesesSubpatternBegin(bool = true, Optional<String> = nullptr) {}
void atomParentheticalAssertionBegin(bool = false) {}
void atomParenthesesEnd() {}
void atomBackReference(unsigned) {}
bool isValidNamedForwardReference(const String&) { return true; }
void atomNamedForwardReference(const String&) {}
void atomNamedBackReference(String) {}
void quantifyAtom(unsigned, unsigned, bool) {}
void disjunction() {}
};
ErrorCode checkSyntax(const String& pattern, const String& flags)
{
SyntaxChecker syntaxChecker;
return parse(syntaxChecker, pattern, flags.contains('u'));
}
}
} // JSC::Yarr

View file

@ -1,35 +0,0 @@
/*
* Copyright (C) 2011 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "YarrErrorCode.h"
namespace JSC {
namespace Yarr {
ErrorCode checkSyntax(const String& pattern, const String& flags);
}
} // JSC::Yarr

View file

@ -29,10 +29,7 @@
#include "Yarr.h"
#include "YarrPattern.h"
using namespace WTF;
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
struct HashIndex {
int16_t value;
@ -52,67 +49,87 @@ struct HashTable {
ALWAYS_INLINE int entry(WTF::String& key) const
{
for (int i = 0; i < numberOfValues; i++) {
if (key.impl()->equals(values[i].key, strlen(values[i].key)))
return values[i].index;
}
return -1;
int indexEntry = key.hash() & indexMask;
int valueIndex = index[indexEntry].value;
if (valueIndex == -1)
return -1;
while (true) {
const char* keyStr = values[valueIndex].key;
// assume max length is 1024
ASSERT(strlen(keyStr) < 1024);
Escargot::Latin1StringFromExternalMemory str((const unsigned char*)keyStr, strnlen(keyStr, 1024));
if (key.equals(&str)) {
return values[valueIndex].index;
}
indexEntry = index[indexEntry].next;
if (indexEntry == -1)
return -1;
valueIndex = index[indexEntry].value;
ASSERT(valueIndex != -1);
};
}
};
#if defined(ENABLE_ICU)
#include "UnicodePatternTables.h"
#endif
Optional<BuiltInCharacterClassID> unicodeMatchPropertyValue(WTF::String unicodePropertyName, WTF::String unicodePropertyValue)
{
#if defined(ENABLE_ICU)
int propertyIndex = -1;
#if defined(ENABLE_ICU)
if (unicodePropertyName == "Script" || unicodePropertyName == "sc")
propertyIndex = scriptHashTable.entry(unicodePropertyValue);
else if (unicodePropertyName == "Script_Extensions" || unicodePropertyName == "scx")
propertyIndex = scriptExtensionHashTable.entry(unicodePropertyValue);
else if (unicodePropertyName == "General_Category" || unicodePropertyName == "gc")
propertyIndex = generalCategoryHashTable.entry(unicodePropertyValue);
#endif
if (propertyIndex == -1)
return nullptr;
return Optional<BuiltInCharacterClassID>(static_cast<BuiltInCharacterClassID>(static_cast<int>(BuiltInCharacterClassID::BaseUnicodePropertyID) + propertyIndex));
#else
return nullptr;
#endif
}
Optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String unicodePropertyValue)
Optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String unicodePropertyValue, CompileMode compileMode)
{
#if defined(ENABLE_ICU)
int propertyIndex = -1;
#if defined(ENABLE_ICU)
propertyIndex = binaryPropertyHashTable.entry(unicodePropertyValue);
if (propertyIndex == -1)
propertyIndex = generalCategoryHashTable.entry(unicodePropertyValue);
if (propertyIndex == -1 && compileMode == CompileMode::UnicodeSets)
propertyIndex = sequencePropertyHashTable.entry(unicodePropertyValue);
#endif
if (propertyIndex == -1)
return nullptr;
return Optional<BuiltInCharacterClassID>(static_cast<BuiltInCharacterClassID>(static_cast<int>(BuiltInCharacterClassID::BaseUnicodePropertyID) + propertyIndex));
#else
return nullptr;
#endif
}
std::unique_ptr<CharacterClass> createUnicodeCharacterClassFor(BuiltInCharacterClassID unicodeClassID)
{
#if defined(ENABLE_ICU)
unsigned unicodePropertyIndex = static_cast<unsigned>(unicodeClassID) - static_cast<unsigned>(BuiltInCharacterClassID::BaseUnicodePropertyID);
return createFunctions[unicodePropertyIndex]();
#if defined(ENABLE_ICU)
return createCharacterClassFunctions[unicodePropertyIndex]();
#else
RELEASE_ASSERT_NOT_REACHED();
return nullptr;
#endif
}
bool characterClassMayContainStrings(BuiltInCharacterClassID unicodeClassID)
{
unsigned unicodePropertyIndex = static_cast<unsigned>(unicodeClassID) - static_cast<unsigned>(BuiltInCharacterClassID::BaseUnicodePropertyID);
#if defined(ENABLE_ICU)
return unicodeCharacterClassMayContainStrings(unicodePropertyIndex);
#else
return false;
#endif
}
} // namespace JSC::Yarr
} } // namespace JSC::Yarr

View file

@ -27,14 +27,16 @@
#include "Yarr.h"
namespace JSC {
namespace Yarr {
namespace JSC { namespace Yarr {
struct CharacterClass;
struct ClassSet;
enum class CompileMode : uint8_t;
JS_EXPORT_PRIVATE Optional<BuiltInCharacterClassID> unicodeMatchPropertyValue(WTF::String, WTF::String);
JS_EXPORT_PRIVATE Optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String);
JS_EXPORT_PRIVATE Optional<BuiltInCharacterClassID> unicodeMatchProperty(WTF::String, CompileMode);
std::unique_ptr<CharacterClass> createUnicodeCharacterClassFor(BuiltInCharacterClassID);
}
} // namespace JSC::Yarr
JS_EXPORT_PRIVATE bool characterClassMayContainStrings(BuiltInCharacterClassID unicodeClassID);
} } // namespace JSC::Yarr

View file

@ -802,6 +802,8 @@
## 16. Etc
# Stackoverflow error not occured with some compiler
'json-parser-recursive': [SKIP],
# Yarr support number in { } uint32 range
'regress/regress-126412': [SKIP],
}], # 'escargot == True'