mirror of
https://github.com/Samsung/escargot.git
synced 2026-06-22 10:01:50 +00:00
If there is unicode flag is set but find un-paired utf-16 surrogate in RegExp interpreter,
We should not use utf-16 surrogate pair rule for input. Signed-off-by: Seonghyun Kim <sh8281.kim@samsung.com>
This commit is contained in:
parent
7fc59b7171
commit
eda2f8d4fa
1 changed files with 22 additions and 0 deletions
22
third_party/yarr/YarrInterpreter.cpp
vendored
22
third_party/yarr/YarrInterpreter.cpp
vendored
|
|
@ -1661,6 +1661,15 @@ public:
|
|||
BACKTRACK();
|
||||
}
|
||||
MATCH_NEXT();
|
||||
#if defined(ENABLE_ICU)
|
||||
} else if (U16_IS_SURROGATE(currentTerm().atom.patternCharacter)) {
|
||||
// Escargot update
|
||||
for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
|
||||
if (!checkCharacter(currentTerm(), currentTerm().inputPosition - matchAmount))
|
||||
BACKTRACK();
|
||||
}
|
||||
MATCH_NEXT();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1685,6 +1694,18 @@ public:
|
|||
BACKTRACK();
|
||||
}
|
||||
MATCH_NEXT();
|
||||
#if defined(ENABLE_ICU)
|
||||
} else if (U16_IS_SURROGATE(currentTerm().atom.patternCharacter)) {
|
||||
// Escargot update
|
||||
for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
|
||||
auto inputPosition = term.inputPosition + 2 * matchAmount;
|
||||
if (input.getPos() < inputPosition)
|
||||
BACKTRACK();
|
||||
if (!checkCharacter(term, inputPosition))
|
||||
BACKTRACK();
|
||||
}
|
||||
MATCH_NEXT();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2197,6 +2218,7 @@ public:
|
|||
lo = tolower(ch);
|
||||
hi = toupper(ch);
|
||||
} else {
|
||||
// Escargot update
|
||||
// if ch is ALPHABETIC like latin or greek, we should not apply u_tolower or u_toupper (print('iI\u0130'.replace(/\u0130/gi, '#')))
|
||||
auto v = u_getIntPropertyValue(ch, UProperty::UCHAR_ALPHABETIC);
|
||||
if (v) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue