mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-10-07 08:03:29 +00:00
!1636 string.replaceAll and regexp groupname
Merge pull request !1636 from zhangyouyou/master
This commit is contained in:
commit
0618a06957
@ -39,7 +39,15 @@ static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
|
||||
0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
|
||||
0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
|
||||
};
|
||||
|
||||
static constexpr int UICODE_FROM_UTF8[] = {
|
||||
0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd,
|
||||
};
|
||||
static constexpr int UTF8_MIN_CODE[] = {
|
||||
0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
|
||||
};
|
||||
static constexpr char UTF8_FIRST_CODE[] = {
|
||||
0x1f, 0xf, 0x7, 0x3, 0x1,
|
||||
};
|
||||
class StringHelper {
|
||||
public:
|
||||
static std::string ToStdString(EcmaString *string);
|
||||
@ -175,7 +183,51 @@ public:
|
||||
|
||||
static EcmaString *Repeat(JSThread *thread, const std::u16string &thisStr, int32_t repeatLen, bool canBeCompress);
|
||||
|
||||
static EcmaString *Trim(JSThread *thread, const std::u16string &thisStr);
|
||||
static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp)
|
||||
{
|
||||
int c = *p++;
|
||||
if (c < UICODE_FROM_UTF8[0]) {
|
||||
*pp = p;
|
||||
return c;
|
||||
}
|
||||
int l = 0;
|
||||
if (c >= UICODE_FROM_UTF8[1] && c <= UICODE_FROM_UTF8[2]) { // 1 - 2: 0000 0080 - 0000 07FF
|
||||
l = 1; // 1: 0000 0080 - 0000 07FF Unicode
|
||||
} else if (c >= UICODE_FROM_UTF8[3] && c <= UICODE_FROM_UTF8[4]) { // 3 - 4: 0000 0800 - 0000 FFFF
|
||||
l = 2; // 2: 0000 0800 - 0000 FFFF Unicode
|
||||
} else if (c >= UICODE_FROM_UTF8[5] && c <= UICODE_FROM_UTF8[6]) { // 5 - 6: 0001 0000 - 0010 FFFF
|
||||
l = 3; // 3: 0001 0000 - 0010 FFFF Unicode
|
||||
} else if (c >= UICODE_FROM_UTF8[7] && c <= UICODE_FROM_UTF8[8]) { // 7 - 8: 0020 0000 - 03FF FFFF
|
||||
l = 4; // 4: 0020 0000 - 03FF FFFF Unicode
|
||||
} else if (c == UICODE_FROM_UTF8[9] || c == UICODE_FROM_UTF8[10]) { // 9 - 10: 0400 0000 - 7FFF FFFF
|
||||
l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
/* check that we have enough characters */
|
||||
if (l > (maxLen - 1))
|
||||
return -1;
|
||||
|
||||
return FromUtf8(c, l, p, pp);
|
||||
}
|
||||
|
||||
static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp)
|
||||
{
|
||||
int b;
|
||||
c &= UTF8_FIRST_CODE[l - 1];
|
||||
for (int i = 0; i < l; i++) {
|
||||
b = *p++;
|
||||
if (b < utf_helper::UTF8_2B_SECOND || b >= utf_helper::UTF8_2B_FIRST) {
|
||||
return -1;
|
||||
}
|
||||
c = (c << 6) | (b & utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range
|
||||
}
|
||||
if (c < UTF8_MIN_CODE[l - 1]) {
|
||||
return -1;
|
||||
}
|
||||
*pp = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
|
||||
{
|
||||
|
@ -40,6 +40,7 @@ static constexpr uint8_t UTF8_1B_MAX = 0x7f;
|
||||
static constexpr uint16_t UTF8_2B_MAX = 0x7ff;
|
||||
static constexpr uint8_t UTF8_2B_FIRST = 0xc0;
|
||||
static constexpr uint8_t UTF8_2B_SECOND = 0x80;
|
||||
static constexpr uint8_t UTF8_2B_THIRD = 0x3f;
|
||||
|
||||
static constexpr uint8_t UTF8_3B_FIRST = 0xe0;
|
||||
static constexpr uint8_t UTF8_3B_SECOND = 0x80;
|
||||
|
@ -1583,6 +1583,7 @@ void Builtins::InitializeString(const JSHandle<GlobalEnv> &env, const JSHandle<J
|
||||
SetFunction(env, stringFuncPrototype, "padStart", BuiltinsString::PadStart, FunctionLength::ONE);
|
||||
SetFunction(env, stringFuncPrototype, "padEnd", BuiltinsString::PadEnd, FunctionLength::ONE);
|
||||
SetFunction(env, stringFuncPrototype, "replace", BuiltinsString::Replace, FunctionLength::TWO);
|
||||
SetFunction(env, stringFuncPrototype, "replaceAll", BuiltinsString::ReplaceAll, FunctionLength::TWO);
|
||||
SetFunction(env, stringFuncPrototype, "search", BuiltinsString::Search, FunctionLength::ONE);
|
||||
SetFunction(env, stringFuncPrototype, "slice", BuiltinsString::Slice, FunctionLength::TWO);
|
||||
SetFunction(env, stringFuncPrototype, "split", BuiltinsString::Split, FunctionLength::TWO);
|
||||
|
@ -913,10 +913,23 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
|
||||
// v. Let n be n+1
|
||||
++index;
|
||||
}
|
||||
|
||||
// j. Let namedCaptures be ? Get(result, "groups").
|
||||
JSHandle<JSTaggedValue> groupsKey = globalConst->GetHandledGroupsString();
|
||||
JSTaggedValue named =
|
||||
FastRuntimeStub::FastGetPropertyByValue(thread, resultValues.GetTaggedValue(), groupsKey.GetTaggedValue());
|
||||
JSHandle<JSTaggedValue> namedCaptures(thread, named);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// m. If functionalReplace is true, then
|
||||
CString replacement;
|
||||
int emptyArrLength = 0;
|
||||
if (namedCaptures->IsUndefined()) {
|
||||
emptyArrLength = 3; // 3: «matched, pos, and string»
|
||||
} else {
|
||||
emptyArrLength = 4; // 4: «matched, pos, string, and groups»
|
||||
}
|
||||
JSHandle<TaggedArray> replacerArgs =
|
||||
factory->NewTaggedArray(3 + capturesList->GetLength()); // 3: «matched, pos, and string»
|
||||
factory->NewTaggedArray(emptyArrLength + capturesList->GetLength());
|
||||
if (functionalReplace) {
|
||||
// i. Let replacerArgs be «matched».
|
||||
replacerArgs->Set(thread, 0, getMatchString.GetTaggedValue());
|
||||
@ -929,6 +942,9 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
|
||||
}
|
||||
replacerArgs->Set(thread, index + 1, JSTaggedValue(position));
|
||||
replacerArgs->Set(thread, index + 2, inputStr.GetTaggedValue()); // 2: position of string
|
||||
if (!namedCaptures->IsUndefined()) {
|
||||
replacerArgs->Set(thread, index + 3, namedCaptures.GetTaggedValue()); // 3: position of groups
|
||||
}
|
||||
// iv. Let replValue be Call(replaceValue, undefined, replacerArgs).
|
||||
const size_t argsLength = replacerArgs->GetLength();
|
||||
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
|
||||
@ -944,8 +960,14 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
|
||||
replacement = ConvertToString(*replacementString, StringConvertedUsage::LOGICOPERATION);
|
||||
} else {
|
||||
// n. Else,
|
||||
if (!namedCaptures->IsUndefined()) {
|
||||
JSHandle<JSObject> namedCapturesObj = JSTaggedValue::ToObject(thread, namedCaptures);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
namedCaptures = JSHandle<JSTaggedValue>::Cast(namedCapturesObj);
|
||||
}
|
||||
JSHandle<JSTaggedValue> replacementHandle(
|
||||
thread, BuiltinsString::GetSubstitution(thread, matchString, srcString, position, capturesList,
|
||||
thread, BuiltinsString::GetSubstitution(thread, matchString, srcString,
|
||||
position, capturesList, namedCaptures,
|
||||
replaceValueHandle));
|
||||
replacement = ConvertToString(EcmaString::Cast(replacementHandle->GetTaggedObject()),
|
||||
StringConvertedUsage::LOGICOPERATION);
|
||||
@ -1350,7 +1372,6 @@ bool BuiltinsRegExp::GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedV
|
||||
uint8_t flags = static_cast<uint8_t>(regexpObj->GetOriginalFlags().GetInt());
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
// 21.2.5.2.2
|
||||
JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> ®exp,
|
||||
const JSHandle<JSTaggedValue> &inputStr, bool useCache)
|
||||
@ -1389,14 +1410,6 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
|
||||
JSMutableHandle<JSTaggedValue> flags(thread, regexpObj->GetOriginalFlags());
|
||||
|
||||
JSHandle<RegExpExecResultCache> cacheTable(thread->GetEcmaVM()->GetRegExpCache());
|
||||
if (lastIndex == 0 && useCache) {
|
||||
JSTaggedValue cacheResult =
|
||||
cacheTable->FindCachedResult(thread, pattern, flags, inputStr, RegExpExecResultCache::EXEC_TYPE, regexp);
|
||||
if (cacheResult != JSTaggedValue::Undefined()) {
|
||||
return cacheResult;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t length = static_cast<EcmaString *>(inputStr->GetTaggedObject())->GetLength();
|
||||
uint8_t flagsBits = static_cast<uint8_t>(regexpObj->GetOriginalFlags().GetInt());
|
||||
JSHandle<JSTaggedValue> flagsValue(thread, FlagsBitsToString(thread, flagsBits));
|
||||
@ -1457,6 +1470,17 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
|
||||
// 27. Perform CreateDataProperty(A, "0", matched_substr).
|
||||
JSHandle<JSTaggedValue> zeroValue(matchResult.captures_[0].second);
|
||||
JSObject::CreateDataProperty(thread, results, 0, zeroValue);
|
||||
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
|
||||
|
||||
JSHandle<JSTaggedValue> groupName(thread, regexpObj->GetGroupName());
|
||||
JSMutableHandle<JSTaggedValue> groups(thread, JSTaggedValue::Undefined());
|
||||
if (!groupName->IsUndefined()) {
|
||||
JSHandle<JSTaggedValue> nullHandle(thread, JSTaggedValue::Null());
|
||||
JSHandle<JSObject> nullObj = factory->OrdinaryNewJSObjectCreate(nullHandle);
|
||||
groups.Update(nullObj.GetTaggedValue());
|
||||
}
|
||||
JSHandle<JSTaggedValue> groupsKey = globalConst->GetHandledGroupsString();
|
||||
JSObject::CreateDataProperty(thread, results, groupsKey, groups);
|
||||
// 28. For each integer i such that i > 0 and i <= n
|
||||
for (uint32_t i = 1; i < capturesSize; i++) {
|
||||
// a. Let capture_i be ith element of r's captures List
|
||||
@ -1468,6 +1492,14 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
|
||||
}
|
||||
JSHandle<JSTaggedValue> iValue(thread, capturedValue);
|
||||
JSObject::CreateDataProperty(thread, results, i, iValue);
|
||||
if (!groupName->IsUndefined()) {
|
||||
JSHandle<JSObject> groupObject = JSHandle<JSObject>::Cast(groups);
|
||||
TaggedArray *groupArray = TaggedArray::Cast(regexpObj->GetGroupName().GetTaggedObject());
|
||||
if (groupArray->GetLength() > i - 1) {
|
||||
JSHandle<JSTaggedValue> skey(thread, groupArray->Get(i - 1));
|
||||
JSObject::CreateDataProperty(thread, groupObject, skey, iValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lastIndex == 0 && useCache) {
|
||||
RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flags, inputStr,
|
||||
@ -1672,6 +1704,15 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle<
|
||||
regexp->SetOriginalSource(thread, patternStrHandle.GetTaggedValue());
|
||||
// 12. Set the value of obj’s [[OriginalFlags]] internal slot to F.
|
||||
regexp->SetOriginalFlags(thread, JSTaggedValue(flagsBits));
|
||||
auto groupName = parser.GetGroupNames();
|
||||
if (!groupName.empty()) {
|
||||
JSHandle<TaggedArray> taggedArray = factory->NewTaggedArray(groupName.size());
|
||||
for (size_t i = 0; i < groupName.size(); ++i) {
|
||||
JSHandle<JSTaggedValue> flagsKey(factory->NewFromStdString(groupName[i].c_str()));
|
||||
taggedArray->Set(thread, i, flagsKey);
|
||||
}
|
||||
regexp->SetGroupName(thread, taggedArray);
|
||||
}
|
||||
// 13. Set obj’s [[RegExpMatcher]] internal slot.
|
||||
if (getCache.first == JSTaggedValue::Hole()) {
|
||||
auto bufferSize = parser.GetOriginBufferSize();
|
||||
|
@ -662,8 +662,8 @@ JSTaggedValue BuiltinsString::MatchAll(EcmaRuntimeCallInfo *argv)
|
||||
// b. If isRegExp is true, then
|
||||
if (isJSRegExp) {
|
||||
// i. Let flags be ? Get(searchValue, "flags").
|
||||
JSHandle<JSTaggedValue> flagsKey(factory->NewFromASCII("flags"));
|
||||
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, regexp, flagsKey).GetValue();
|
||||
JSHandle<JSTaggedValue> flagsString(globalConst->GetHandledFlagsString());
|
||||
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, regexp, flagsString).GetValue();
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// ii. Perform ? RequireObjectCoercible(flags).
|
||||
JSTaggedValue::RequireObjectCoercible(thread, flags);
|
||||
@ -992,13 +992,13 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
|
||||
if (pos == -1) {
|
||||
return thisString.GetTaggedValue();
|
||||
}
|
||||
|
||||
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
|
||||
JSMutableHandle<JSTaggedValue> replHandle(thread, factory->GetEmptyString().GetTaggedValue());
|
||||
// If functionalReplace is true, then
|
||||
if (replaceTag->IsCallable()) {
|
||||
// Let replValue be Call(replaceValue, undefined,«matched, pos, and string»).
|
||||
const size_t argsLength = 3; // 3: «matched, pos, and string»
|
||||
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
|
||||
|
||||
EcmaRuntimeCallInfo info =
|
||||
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceTag, undefined, undefined, argsLength);
|
||||
info.SetCallArg(searchString.GetTaggedValue(), JSTaggedValue(pos), thisString.GetTaggedValue());
|
||||
@ -1006,17 +1006,19 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
|
||||
replHandle.Update(replStrDeocodeValue);
|
||||
} else {
|
||||
// Let captures be an empty List.
|
||||
JSHandle<TaggedArray> capturesList = factory->NewTaggedArray(0);
|
||||
JSHandle<TaggedArray> capturesList = factory->EmptyArray();
|
||||
ASSERT_PRINT(replaceTag->IsString(), "replace must be string");
|
||||
JSHandle<EcmaString> replacement(thread, replaceTag->GetTaggedObject());
|
||||
// Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue)
|
||||
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, replacement));
|
||||
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, undefined, replacement));
|
||||
}
|
||||
JSHandle<EcmaString> realReplaceStr = JSTaggedValue::ToString(thread, replHandle);
|
||||
// Let tailPos be pos + the number of code units in matched.
|
||||
int32_t tailPos = pos + static_cast<int32_t>(searchString->GetLength());
|
||||
// Let newString be the String formed by concatenating the first pos code units of string, replStr, and the trailing
|
||||
// substring of string starting at index tailPos. If pos is 0, the first element of the concatenation will be the
|
||||
// Let newString be the String formed by concatenating the first pos code units of string,
|
||||
// replStr, and the trailing
|
||||
// substring of string starting at index tailPos. If pos is 0,
|
||||
// the first element of the concatenation will be the
|
||||
// empty String.
|
||||
// Return newString.
|
||||
JSHandle<EcmaString> prefixString(thread, EcmaString::FastSubString(thisString, 0, pos, ecmaVm));
|
||||
@ -1057,9 +1059,169 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
|
||||
factory->NewFromUtf16LiteralNotCompress(uint16tData, stringBuilder.size()).GetTaggedValue();
|
||||
}
|
||||
|
||||
JSTaggedValue BuiltinsString::ReplaceAll(EcmaRuntimeCallInfo *argv)
|
||||
{
|
||||
ASSERT(argv);
|
||||
JSThread *thread = argv->GetThread();
|
||||
BUILTINS_API_TRACE(thread, String, ReplaceAll);
|
||||
[[maybe_unused]] EcmaHandleScope handleScope(thread);
|
||||
JSHandle<JSTaggedValue> thisTag = JSTaggedValue::RequireObjectCoercible(thread, BuiltinsString::GetThis(argv));
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
|
||||
auto ecmaVm = thread->GetEcmaVM();
|
||||
JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
|
||||
const GlobalEnvConstants *globalConst = thread->GlobalConstants();
|
||||
JSHandle<JSTaggedValue> searchTag = BuiltinsString::GetCallArg(argv, 0);
|
||||
JSHandle<JSTaggedValue> replaceTag = BuiltinsString::GetCallArg(argv, 1);
|
||||
|
||||
ObjectFactory *factory = ecmaVm->GetFactory();
|
||||
|
||||
if (!searchTag->IsUndefined() && !searchTag->IsNull()) {
|
||||
// a. Let isRegExp be ? IsRegExp(searchValue).
|
||||
bool isJSRegExp = JSObject::IsRegExp(thread, searchTag);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// b. If isRegExp is true, then
|
||||
if (isJSRegExp) {
|
||||
// i. Let flags be ? Get(searchValue, "flags").
|
||||
JSHandle<JSTaggedValue> flagsString(globalConst->GetHandledFlagsString());
|
||||
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, searchTag, flagsString).GetValue();
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// ii. Perform ? RequireObjectCoercible(flags).
|
||||
JSTaggedValue::RequireObjectCoercible(thread, flags);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// iii. If ? ToString(flags) does not contain "g", throw a TypeError exception.
|
||||
JSHandle<EcmaString> flagString = JSTaggedValue::ToString(thread, flags);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
JSHandle<EcmaString> gString(globalConst->GetHandledGString());
|
||||
int32_t pos = flagString->IndexOf(*gString);
|
||||
if (pos == -1) {
|
||||
THROW_TYPE_ERROR_AND_RETURN(thread,
|
||||
"string.prototype.replaceAll called with a non-global RegExp argument",
|
||||
JSTaggedValue::Exception());
|
||||
}
|
||||
}
|
||||
// c. Let replacer be ? GetMethod(searchValue, @@replace).
|
||||
JSHandle<JSTaggedValue> replaceKey = env->GetReplaceSymbol();
|
||||
JSHandle<JSTaggedValue> replaceMethod = JSObject::GetMethod(thread, searchTag, replaceKey);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// d. If replacer is not undefined, then
|
||||
if (!replaceMethod->IsUndefined()) {
|
||||
// i. Return ? Call(replacer, searchValue, «O, replaceValue»).
|
||||
const size_t argsLength = 2;
|
||||
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
|
||||
EcmaRuntimeCallInfo info =
|
||||
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceMethod, searchTag, undefined, argsLength);
|
||||
info.SetCallArg(thisTag.GetTaggedValue(), replaceTag.GetTaggedValue());
|
||||
return JSFunction::Call(&info);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Let string be ? ToString(O).
|
||||
JSHandle<EcmaString> thisString = JSTaggedValue::ToString(thread, thisTag);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// 4. Let searchString be ? ToString(searchValue).
|
||||
JSHandle<EcmaString> searchString = JSTaggedValue::ToString(thread, searchTag);
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
// 5. Let functionalReplace be IsCallable(replaceValue).
|
||||
// 6. If functionalReplace is false, then
|
||||
if (!replaceTag->IsCallable()) {
|
||||
// a. Set replaceValue to ? ToString(replaceValue).
|
||||
replaceTag = JSHandle<JSTaggedValue>(JSTaggedValue::ToString(thread, replaceTag));
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
}
|
||||
|
||||
// 7. Let searchLength be the length of searchString.
|
||||
// 8. Let advanceBy be max(1, searchLength).
|
||||
int32_t searchLength = searchString->GetLength();
|
||||
int32_t advanceBy = std::max(1, searchLength);
|
||||
// 9. Let matchPositions be a new empty List.
|
||||
std::u16string stringBuilder;
|
||||
std::u16string stringPrefixString;
|
||||
std::u16string stringRealReplaceStr;
|
||||
std::u16string stringSuffixString;
|
||||
// 10. Let position be ! StringIndexOf(string, searchString, 0).
|
||||
int32_t pos = thisString->IndexOf(*searchString);
|
||||
int32_t endOfLastMatch = 0;
|
||||
bool canBeCompress = true;
|
||||
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
|
||||
JSMutableHandle<JSTaggedValue> replHandle(thread, factory->GetEmptyString().GetTaggedValue());
|
||||
while (pos != -1) {
|
||||
// If functionalReplace is true, then
|
||||
if (replaceTag->IsCallable()) {
|
||||
// Let replValue be Call(replaceValue, undefined,«matched, pos, and string»).
|
||||
const size_t argsLength = 3; // 3: «matched, pos, and string»
|
||||
|
||||
EcmaRuntimeCallInfo info =
|
||||
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceTag, undefined, undefined, argsLength);
|
||||
info.SetCallArg(searchString.GetTaggedValue(), JSTaggedValue(pos), thisString.GetTaggedValue());
|
||||
JSTaggedValue replStrDeocodeValue = JSFunction::Call(&info);
|
||||
replHandle.Update(replStrDeocodeValue);
|
||||
} else {
|
||||
// Let captures be an empty List.
|
||||
JSHandle<TaggedArray> capturesList = factory->NewTaggedArray(0);
|
||||
ASSERT_PRINT(replaceTag->IsString(), "replace must be string");
|
||||
JSHandle<EcmaString> replacement(thread, replaceTag->GetTaggedObject());
|
||||
// Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue)
|
||||
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos,
|
||||
capturesList, undefined, replacement));
|
||||
}
|
||||
JSHandle<EcmaString> realReplaceStr = JSTaggedValue::ToString(thread, replHandle);
|
||||
// Let tailPos be pos + the number of code units in matched.
|
||||
// Let newString be the String formed by concatenating the first pos code units of string,
|
||||
// replStr, and the trailing substring of string starting at index tailPos.
|
||||
// If pos is 0, the first element of the concatenation will be the
|
||||
// empty String.
|
||||
// Return newString.
|
||||
JSHandle<EcmaString> prefixString(thread,
|
||||
EcmaString::FastSubString(thisString, endOfLastMatch,
|
||||
pos - endOfLastMatch, ecmaVm));
|
||||
if (prefixString->IsUtf16()) {
|
||||
const uint16_t *data = prefixString->GetDataUtf16();
|
||||
stringPrefixString = base::StringHelper::Utf16ToU16String(data, prefixString->GetLength());
|
||||
canBeCompress = false;
|
||||
} else {
|
||||
const uint8_t *data = prefixString->GetDataUtf8();
|
||||
stringPrefixString = base::StringHelper::Utf8ToU16String(data, prefixString->GetLength());
|
||||
}
|
||||
if (realReplaceStr->IsUtf16()) {
|
||||
const uint16_t *data = realReplaceStr->GetDataUtf16();
|
||||
stringRealReplaceStr = base::StringHelper::Utf16ToU16String(data, realReplaceStr->GetLength());
|
||||
canBeCompress = false;
|
||||
} else {
|
||||
const uint8_t *data = realReplaceStr->GetDataUtf8();
|
||||
stringRealReplaceStr = base::StringHelper::Utf8ToU16String(data, realReplaceStr->GetLength());
|
||||
}
|
||||
stringBuilder = stringBuilder + stringPrefixString + stringRealReplaceStr;
|
||||
endOfLastMatch = pos + searchLength;
|
||||
pos = thisString->IndexOf(*searchString, pos + advanceBy);
|
||||
}
|
||||
|
||||
if (endOfLastMatch < static_cast<int32_t>(thisString->GetLength())) {
|
||||
JSHandle<EcmaString> suffixString(thread,
|
||||
EcmaString::FastSubString(thisString, endOfLastMatch,
|
||||
thisString->GetLength() - endOfLastMatch, ecmaVm));
|
||||
if (suffixString->IsUtf16()) {
|
||||
const uint16_t *data = suffixString->GetDataUtf16();
|
||||
stringSuffixString = base::StringHelper::Utf16ToU16String(data, suffixString->GetLength());
|
||||
canBeCompress = false;
|
||||
} else {
|
||||
const uint8_t *data = suffixString->GetDataUtf8();
|
||||
stringSuffixString = base::StringHelper::Utf8ToU16String(data, suffixString->GetLength());
|
||||
}
|
||||
stringBuilder = stringBuilder + stringSuffixString;
|
||||
}
|
||||
|
||||
auto *char16tData = const_cast<char16_t *>(stringBuilder.c_str());
|
||||
auto *uint16tData = reinterpret_cast<uint16_t *>(char16tData);
|
||||
return canBeCompress ?
|
||||
factory->NewFromUtf16LiteralCompress(uint16tData, stringBuilder.length()).GetTaggedValue() :
|
||||
factory->NewFromUtf16LiteralNotCompress(uint16tData, stringBuilder.length()).GetTaggedValue();
|
||||
}
|
||||
|
||||
JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<EcmaString> &matched,
|
||||
const JSHandle<EcmaString> &srcString, int position,
|
||||
const JSHandle<TaggedArray> &captureList,
|
||||
const JSHandle<JSTaggedValue> &namedCaptures,
|
||||
const JSHandle<EcmaString> &replacement)
|
||||
{
|
||||
BUILTINS_API_TRACE(thread, String, GetSubstitution);
|
||||
@ -1073,7 +1235,6 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
|
||||
if (nextDollarIndex < 0) {
|
||||
return replacement.GetTaggedValue();
|
||||
}
|
||||
|
||||
std::u16string stringBuilder;
|
||||
bool canBeCompress = true;
|
||||
if (nextDollarIndex > 0) {
|
||||
@ -1099,6 +1260,7 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
|
||||
}
|
||||
int continueFromIndex = -1;
|
||||
uint16_t peek = replacement->At(peekIndex);
|
||||
int32_t p = 0;
|
||||
switch (peek) {
|
||||
case '$': // $$
|
||||
stringBuilder += '$';
|
||||
@ -1193,6 +1355,42 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
|
||||
continueFromIndex = peekIndex + advance;
|
||||
break;
|
||||
}
|
||||
case '<': {
|
||||
if (namedCaptures->IsUndefined()) {
|
||||
stringBuilder += '$';
|
||||
continueFromIndex = peekIndex;
|
||||
break;
|
||||
}
|
||||
JSHandle<EcmaString> greaterSymString = factory->NewFromASCII(">");
|
||||
int pos = replacement->IndexOf(*greaterSymString, peekIndex);
|
||||
if (pos == -1) {
|
||||
stringBuilder += '$';
|
||||
continueFromIndex = peekIndex;
|
||||
break;
|
||||
}
|
||||
JSHandle<EcmaString> groupName(thread,
|
||||
EcmaString::FastSubString(replacement,
|
||||
peekIndex + 1, pos - peekIndex - 1, ecmaVm));
|
||||
JSHandle<JSTaggedValue> names(groupName);
|
||||
JSHandle<JSTaggedValue> capture = JSObject::GetProperty(thread, namedCaptures, names).GetValue();
|
||||
if (capture->IsUndefined()) {
|
||||
continueFromIndex = pos + 1;
|
||||
p = pos;
|
||||
break;
|
||||
}
|
||||
JSHandle<EcmaString> captureName(capture);
|
||||
if (captureName->IsUtf16()) {
|
||||
const uint16_t *data = captureName->GetDataUtf16();
|
||||
stringBuilder += base::StringHelper::Utf16ToU16String(data, captureName->GetLength());
|
||||
canBeCompress = false;
|
||||
} else {
|
||||
const uint8_t *data = captureName->GetDataUtf8();
|
||||
stringBuilder += base::StringHelper::Utf8ToU16String(data, captureName->GetLength());
|
||||
}
|
||||
continueFromIndex = pos + 1;
|
||||
p = pos;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
stringBuilder += '$';
|
||||
continueFromIndex = peekIndex;
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
static JSTaggedValue GetSubstitution(JSThread *thread, const JSHandle<EcmaString> &matched,
|
||||
const JSHandle<EcmaString> &srcString, int position,
|
||||
const JSHandle<TaggedArray> &captureList,
|
||||
const JSHandle<JSTaggedValue> &namedCaptures,
|
||||
const JSHandle<EcmaString> &replacement);
|
||||
// 21.1.3.1
|
||||
static JSTaggedValue CharAt(EcmaRuntimeCallInfo *argv);
|
||||
@ -78,6 +79,7 @@ public:
|
||||
// 21.1.3.14
|
||||
static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv);
|
||||
// 21.1.3.14.1 Runtime Semantics: GetSubstitution()
|
||||
static JSTaggedValue ReplaceAll(EcmaRuntimeCallInfo *argv);
|
||||
// 21.1.3.15
|
||||
static JSTaggedValue Search(EcmaRuntimeCallInfo *argv);
|
||||
// 21.1.3.16
|
||||
|
@ -1932,13 +1932,16 @@ void JSRegExp::Dump(std::ostream &os) const
|
||||
{
|
||||
os << "\n";
|
||||
os << " - ByteCodeBuffer: ";
|
||||
GetByteCodeBuffer().Dump(os);
|
||||
GetByteCodeBuffer().D();
|
||||
os << "\n";
|
||||
os << " - OriginalSource: ";
|
||||
GetOriginalSource().Dump(os);
|
||||
GetOriginalSource().D();
|
||||
os << "\n";
|
||||
os << " - OriginalFlags: ";
|
||||
GetOriginalFlags().Dump(os);
|
||||
GetOriginalFlags().D();
|
||||
os << "\n";
|
||||
os << " - GroupName: ";
|
||||
GetGroupName().D();
|
||||
os << "\n";
|
||||
os << " - Length: " << GetLength();
|
||||
os << "\n";
|
||||
@ -4086,7 +4089,7 @@ void JSRegExp::DumpForSnapshot(std::vector<std::pair<CString, JSTaggedValue>> &v
|
||||
{
|
||||
vec.push_back(std::make_pair(CString("originalSource"), GetOriginalSource()));
|
||||
vec.push_back(std::make_pair(CString("originalFlags"), GetOriginalFlags()));
|
||||
|
||||
vec.push_back(std::make_pair(CString("groupName"), GetGroupName()));
|
||||
JSObject::DumpForSnapshot(vec);
|
||||
}
|
||||
|
||||
|
@ -179,12 +179,13 @@ int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const
|
||||
const EcmaString *lhs = this;
|
||||
int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
|
||||
int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
|
||||
if (rhsCount == 0) {
|
||||
return pos;
|
||||
|
||||
if (pos > lhsCount) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pos >= lhsCount) {
|
||||
return -1;
|
||||
if (rhsCount == 0) {
|
||||
return pos;
|
||||
}
|
||||
|
||||
if (pos < 0) {
|
||||
|
@ -407,6 +407,7 @@ void GlobalEnvConstants::InitGlobalConstant(JSThread *thread)
|
||||
SetConstant(ConstantIndex::FRACTION_STRING_INDEX, factory->NewFromASCIINonMovable("fraction"));
|
||||
SetConstant(ConstantIndex::DECIMAL_STRING_INDEX, factory->NewFromASCIINonMovable("decimal"));
|
||||
SetConstant(ConstantIndex::GROUP_STRING_INDEX, factory->NewFromASCIINonMovable("group"));
|
||||
SetConstant(ConstantIndex::GROUPS_STRING_INDEX, factory->NewFromASCIINonMovable("groups"));
|
||||
SetConstant(ConstantIndex::CURRENCY_STRING_INDEX, factory->NewFromASCIINonMovable("currency"));
|
||||
SetConstant(ConstantIndex::CURRENCY_SIGN_STRING_INDEX, factory->NewFromASCIINonMovable("currencySign"));
|
||||
SetConstant(ConstantIndex::CURRENCY_DISPLAY_STRING_INDEX, factory->NewFromASCIINonMovable("currencyDisplay"));
|
||||
|
@ -280,6 +280,7 @@ class JSThread;
|
||||
V(JSTaggedValue, FractionString, FRACTION_STRING_INDEX, fraction) \
|
||||
V(JSTaggedValue, DecimalString, DECIMAL_STRING_INDEX, decimal) \
|
||||
V(JSTaggedValue, GroupString, GROUP_STRING_INDEX, group) \
|
||||
V(JSTaggedValue, GroupsString, GROUPS_STRING_INDEX, groups) \
|
||||
V(JSTaggedValue, CurrencyString, CURRENCY_STRING_INDEX, currency) \
|
||||
V(JSTaggedValue, CurrencySignString, CURRENCY_SIGN_STRING_INDEX, currencySign) \
|
||||
V(JSTaggedValue, CurrencyDisplayString, CURRENCY_DISPLAY_STRING_INDEX, currencyDisplay) \
|
||||
|
@ -29,7 +29,8 @@ public:
|
||||
static constexpr size_t REGEXP_BYTE_CODE_OFFSET = JSObject::SIZE;
|
||||
ACCESSORS(ByteCodeBuffer, REGEXP_BYTE_CODE_OFFSET, ORIGINAL_SOURCE_OFFSET)
|
||||
ACCESSORS(OriginalSource, ORIGINAL_SOURCE_OFFSET, ORIGINAL_FLAGS_OFFSET)
|
||||
ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, LENGTH_OFFSET)
|
||||
ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, GROUP_NAME_OFFSET)
|
||||
ACCESSORS(GroupName, GROUP_NAME_OFFSET, LENGTH_OFFSET)
|
||||
ACCESSORS_PRIMITIVE_FIELD(Length, uint32_t, LENGTH_OFFSET, LAST_OFFSET)
|
||||
DEFINE_ALIGN_SIZE(LAST_OFFSET);
|
||||
|
||||
|
@ -1055,6 +1055,7 @@ void ObjectFactory::InitializeJSObject(const JSHandle<JSObject> &obj, const JSHa
|
||||
JSRegExp::Cast(*obj)->SetByteCodeBuffer(thread_, JSTaggedValue::Undefined());
|
||||
JSRegExp::Cast(*obj)->SetOriginalSource(thread_, JSTaggedValue::Undefined());
|
||||
JSRegExp::Cast(*obj)->SetOriginalFlags(thread_, JSTaggedValue(0));
|
||||
JSRegExp::Cast(*obj)->SetGroupName(thread_, JSTaggedValue::Undefined());
|
||||
JSRegExp::Cast(*obj)->SetLength(0);
|
||||
break;
|
||||
case JSType::JS_PRIMITIVE_REF:
|
||||
|
@ -344,7 +344,6 @@ public:
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline uint32_t HighestValue() const
|
||||
{
|
||||
if (!rangeSet_.empty()) {
|
||||
@ -352,7 +351,6 @@ public:
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
RangeSet(RangeSet const &) = default;
|
||||
RangeSet &operator=(RangeSet const &) = default;
|
||||
RangeSet(RangeSet &&) = default;
|
||||
|
@ -21,10 +21,15 @@
|
||||
#include "libpandabase/utils/utils.h"
|
||||
#include "securec.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
#include "third_party/icu/icu4c/source/common/unicode/uchar.h"
|
||||
#define _NO_DEBUG_
|
||||
|
||||
namespace panda::ecmascript {
|
||||
static constexpr uint32_t CACHE_SIZE = 128;
|
||||
static constexpr uint32_t ID_START_TABLE_ASCII[4] = {
|
||||
/* $ A-Z _ a-z */
|
||||
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
|
||||
};
|
||||
static RangeSet g_rangeD(0x30, 0x39); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
|
||||
// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
|
||||
static RangeSet g_rangeS({
|
||||
@ -539,6 +544,7 @@ bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
|
||||
return false;
|
||||
}
|
||||
groupNames_.EmitStr(name.c_str());
|
||||
newGroupNames_.push_back(name);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
PrintF("group name %s", name.c_str());
|
||||
Advance();
|
||||
@ -758,24 +764,42 @@ void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int cap
|
||||
bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
|
||||
{
|
||||
const uint8_t *p = *pp;
|
||||
int c = *p;
|
||||
while (c != '>') {
|
||||
if (c < (INT8_MAX + 1)) {
|
||||
if (name.empty()) {
|
||||
if (!g_regexpIdentifyStart.IsContain(c)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!g_regexpIdentifyContinue.IsContain(c)) {
|
||||
return false;
|
||||
}
|
||||
uint32_t c ;
|
||||
char buffer[CACHE_SIZE] = {0};
|
||||
char *q = buffer;
|
||||
while (true) {
|
||||
c = *p;
|
||||
if (c == '\\') {
|
||||
p++;
|
||||
if (*p != 'u') {
|
||||
return false;
|
||||
}
|
||||
name += static_cast<char>(c);
|
||||
if (!ParseUnicodeEscape(&c)) {
|
||||
return false;
|
||||
}
|
||||
} else if (c == '>') {
|
||||
break;
|
||||
} else if (c > CACHE_SIZE) {
|
||||
c = base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p);
|
||||
} else {
|
||||
p++;
|
||||
}
|
||||
c = *++p; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
}
|
||||
p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
if (q == buffer) {
|
||||
if (!IsIdentFirst(c)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!u_isIDPart(c)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (q != nullptr) {
|
||||
*q++ = c;
|
||||
}
|
||||
} // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
p++;
|
||||
*pp = p;
|
||||
name = buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -784,6 +808,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName)
|
||||
const uint8_t *p;
|
||||
int captureIndex = 1;
|
||||
CString name;
|
||||
hasNamedCaptures_ = 0;
|
||||
for (p = base_; p < end_; p++) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
switch (*p) {
|
||||
case '(': {
|
||||
@ -793,6 +818,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName)
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
p[CAPTURE_CONUT_ADVANCE] != '=') {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
hasNamedCaptures_ = 1;
|
||||
p += CAPTURE_CONUT_ADVANCE;
|
||||
if (groupName != nullptr) {
|
||||
if (ParseGroupSpecifier(&p, name)) {
|
||||
@ -836,6 +862,7 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
|
||||
int result = -1;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
PrintF("Parse AtomEscape------\n");
|
||||
PrevOpCode prevOp;
|
||||
switch (c0_) {
|
||||
case KEY_EOF:
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
@ -870,50 +897,108 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
|
||||
case 'd': {
|
||||
// [0-9]
|
||||
RangeOpCode rangeOp;
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, g_rangeD);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
case 'D': {
|
||||
// [^0-9]
|
||||
RangeSet atomRange(g_rangeD);
|
||||
atomRange.Invert(IsUtf16());
|
||||
Range32OpCode rangeOp;
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, atomRange);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
case 's': {
|
||||
// [\f\n\r\t\v]
|
||||
RangeOpCode rangeOp;
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, g_rangeS);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
case 'S': {
|
||||
RangeSet atomRange(g_rangeS);
|
||||
atomRange.Invert(IsUtf16());
|
||||
Range32OpCode rangeOp;
|
||||
atomRange.Invert(IsUtf16());
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, atomRange);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
case 'w': {
|
||||
// [A-Za-z0-9]
|
||||
RangeOpCode rangeOp;
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, g_rangeW);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
case 'W': {
|
||||
// [^A-Za-z0-9]
|
||||
RangeSet atomRange(g_rangeW);
|
||||
atomRange.Invert(IsUtf16());
|
||||
Range32OpCode rangeOp;
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
rangeOp.InsertOpCode(&buffer_, atomRange);
|
||||
Advance();
|
||||
goto parseLookBehind;
|
||||
} break;
|
||||
// P{UnicodePropertyValueExpression}
|
||||
// p{UnicodePropertyValueExpression}
|
||||
case 'P':
|
||||
case 'p':
|
||||
// [+N]kGroupName[?U]
|
||||
case 'k':
|
||||
case 'k': {
|
||||
Advance();
|
||||
if (c0_ != '<') {
|
||||
if (!IsUtf16() || HasNamedCaptures()) {
|
||||
ParseError("expecting group name.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Advance();
|
||||
Prev();
|
||||
CString name;
|
||||
auto **pp = const_cast<const uint8_t **>(&pc_);
|
||||
if (!ParseGroupSpecifier(pp, name)) {
|
||||
ParseError("GroupName Syntax error.");
|
||||
break;
|
||||
}
|
||||
int postion = FindGroupName(name);
|
||||
if (postion < 0) {
|
||||
postion = ParseCaptureCount(name.c_str());
|
||||
if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) {
|
||||
ParseError("group name not defined");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isBackward) {
|
||||
BackwardBackReferenceOpCode backReferenceOp;
|
||||
backReferenceOp.EmitOpCode(&buffer_, postion);
|
||||
} else {
|
||||
BackReferenceOpCode backReferenceOp;
|
||||
backReferenceOp.EmitOpCode(&buffer_, postion);
|
||||
}
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
Advance();
|
||||
} break;
|
||||
parseLookBehind: {
|
||||
if (isBackward) {
|
||||
prevOp.EmitOpCode(&buffer_, 0);
|
||||
}
|
||||
Advance();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
result = ParseCharacterEscape();
|
||||
break;
|
||||
@ -921,6 +1006,22 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
|
||||
return result;
|
||||
}
|
||||
|
||||
int RegExpParser::RecountCaptures()
|
||||
{
|
||||
if (totalCaptureCount_ < 0) {
|
||||
const char *name = reinterpret_cast<const char*>(groupNames_.buf_);
|
||||
totalCaptureCount_ = ParseCaptureCount(name);
|
||||
}
|
||||
return totalCaptureCount_;
|
||||
}
|
||||
bool RegExpParser::HasNamedCaptures()
|
||||
{
|
||||
if (hasNamedCaptures_ < 0) {
|
||||
RecountCaptures();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int RegExpParser::ParseCharacterEscape()
|
||||
{
|
||||
// CharacterEscape[U]::
|
||||
@ -1304,4 +1405,13 @@ void RegExpParser::ParseError(const char *errorMessage)
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
} // namespace panda::ecmascript
|
||||
|
||||
int RegExpParser::IsIdentFirst(uint32_t c)
|
||||
{
|
||||
if (c < CACHE_SIZE) {
|
||||
return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31
|
||||
} else {
|
||||
return u_isIDStart(c);
|
||||
}
|
||||
}
|
||||
} // namespace panda::ecmascript
|
@ -28,6 +28,7 @@
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
namespace panda::ecmascript {
|
||||
class RegExpParser {
|
||||
@ -51,6 +52,7 @@ public:
|
||||
static constexpr uint32_t UNICODE_HEX_VALUE = 4;
|
||||
static constexpr uint32_t UNICODE_HEX_ADVANCE = 2;
|
||||
static constexpr uint32_t CAPTURE_CONUT_ADVANCE = 3;
|
||||
static constexpr uint32_t UTF8_CHAR_LEN_MAX = 6;
|
||||
|
||||
explicit RegExpParser(Chunk *chunk)
|
||||
: base_(nullptr),
|
||||
@ -105,7 +107,21 @@ public:
|
||||
bool ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value);
|
||||
bool ParseUnicodeEscape(uint32_t *value);
|
||||
bool ParserIntervalQuantifier(int *pmin, int *pmax);
|
||||
bool HasNamedCaptures();
|
||||
int ParseEscape(const uint8_t **pp, int isUtf16);
|
||||
int RecountCaptures();
|
||||
int IsIdentFirst(uint32_t c);
|
||||
|
||||
inline std::vector<CString> GetGroupNames() const
|
||||
{
|
||||
return newGroupNames_;
|
||||
}
|
||||
|
||||
inline size_t GetGroupNamesSize() const
|
||||
{
|
||||
return groupNames_.size_ ;
|
||||
}
|
||||
|
||||
inline bool IsError() const
|
||||
{
|
||||
return isError_;
|
||||
@ -227,8 +243,11 @@ private:
|
||||
int stackCount_;
|
||||
bool isError_;
|
||||
char errorMsg_[TMP_BUF_SIZE] = {0}; // NOLINTNEXTLINE(modernize-avoid-c-arrays)
|
||||
int hasNamedCaptures_ = -1;
|
||||
int totalCaptureCount_ = -1;
|
||||
DynChunk buffer_;
|
||||
DynChunk groupNames_;
|
||||
std::vector<CString> newGroupNames_;
|
||||
};
|
||||
} // namespace panda::ecmascript
|
||||
#endif // ECMASCRIPT_REGEXP_PARSER_H
|
||||
|
@ -505,6 +505,7 @@ namespace panda::ecmascript {
|
||||
V(String, PadEnd) \
|
||||
V(String, Repeat) \
|
||||
V(String, Replace) \
|
||||
V(String, ReplaceAll) \
|
||||
V(String, Search) \
|
||||
V(String, Slice) \
|
||||
V(String, Split) \
|
||||
|
@ -472,6 +472,7 @@ static uintptr_t g_nativeTable[] = {
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::PadStart),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::Repeat),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::Replace),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::ReplaceAll),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::Search),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::Slice),
|
||||
reinterpret_cast<uintptr_t>(BuiltinsString::Split),
|
||||
|
@ -275,6 +275,7 @@ static JSHandle<JSRegExp> NewJSRegExp(JSThread *thread, ObjectFactory *factory,
|
||||
JSHandle<JSRegExp> jSRegExp = JSHandle<JSRegExp>::Cast(factory->NewJSObject(jSRegExpClass));
|
||||
jSRegExp->SetByteCodeBuffer(thread, JSTaggedValue::Undefined());
|
||||
jSRegExp->SetOriginalSource(thread, JSTaggedValue::Undefined());
|
||||
jSRegExp->SetGroupName(thread, JSTaggedValue::Undefined());
|
||||
jSRegExp->SetOriginalFlags(thread, JSTaggedValue(0));
|
||||
jSRegExp->SetLength(0);
|
||||
return jSRegExp;
|
||||
@ -500,7 +501,7 @@ HWTEST_F_L0(EcmaDumpTest, HeapProfileDump)
|
||||
break;
|
||||
}
|
||||
case JSType::JS_REG_EXP: {
|
||||
CHECK_DUMP_FIELDS(JSObject::SIZE, JSRegExp::SIZE, 4U)
|
||||
CHECK_DUMP_FIELDS(JSObject::SIZE, JSRegExp::SIZE, 5U)
|
||||
NEW_OBJECT_AND_DUMP(JSRegExp, JS_REG_EXP)
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user