!1636 string.replaceAll and regexp groupname

Merge pull request !1636 from zhangyouyou/master
This commit is contained in:
openharmony_ci 2022-06-29 08:33:54 +00:00 committed by Gitee
commit 0618a06957
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
18 changed files with 492 additions and 59 deletions

View File

@ -39,7 +39,15 @@ static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
};
static constexpr int UICODE_FROM_UTF8[] = {
0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd,
};
static constexpr int UTF8_MIN_CODE[] = {
0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
};
static constexpr char UTF8_FIRST_CODE[] = {
0x1f, 0xf, 0x7, 0x3, 0x1,
};
class StringHelper {
public:
static std::string ToStdString(EcmaString *string);
@ -175,7 +183,51 @@ public:
static EcmaString *Repeat(JSThread *thread, const std::u16string &thisStr, int32_t repeatLen, bool canBeCompress);
static EcmaString *Trim(JSThread *thread, const std::u16string &thisStr);
static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp)
{
int c = *p++;
if (c < UICODE_FROM_UTF8[0]) {
*pp = p;
return c;
}
int l = 0;
if (c >= UICODE_FROM_UTF8[1] && c <= UICODE_FROM_UTF8[2]) { // 1 - 2: 0000 0080 - 0000 07FF
l = 1; // 1: 0000 0080 - 0000 07FF Unicode
} else if (c >= UICODE_FROM_UTF8[3] && c <= UICODE_FROM_UTF8[4]) { // 3 - 4: 0000 0800 - 0000 FFFF
l = 2; // 2: 0000 0800 - 0000 FFFF Unicode
} else if (c >= UICODE_FROM_UTF8[5] && c <= UICODE_FROM_UTF8[6]) { // 5 - 6: 0001 0000 - 0010 FFFF
l = 3; // 3: 0001 0000 - 0010 FFFF Unicode
} else if (c >= UICODE_FROM_UTF8[7] && c <= UICODE_FROM_UTF8[8]) { // 7 - 8: 0020 0000 - 03FF FFFF
l = 4; // 4: 0020 0000 - 03FF FFFF Unicode
} else if (c == UICODE_FROM_UTF8[9] || c == UICODE_FROM_UTF8[10]) { // 9 - 10: 0400 0000 - 7FFF FFFF
l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode
} else {
return -1;
}
/* check that we have enough characters */
if (l > (maxLen - 1))
return -1;
return FromUtf8(c, l, p, pp);
}
static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp)
{
int b;
c &= UTF8_FIRST_CODE[l - 1];
for (int i = 0; i < l; i++) {
b = *p++;
if (b < utf_helper::UTF8_2B_SECOND || b >= utf_helper::UTF8_2B_FIRST) {
return -1;
}
c = (c << 6) | (b & utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range
}
if (c < UTF8_MIN_CODE[l - 1]) {
return -1;
}
*pp = p;
return c;
}
static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
{

View File

@ -40,6 +40,7 @@ static constexpr uint8_t UTF8_1B_MAX = 0x7f;
static constexpr uint16_t UTF8_2B_MAX = 0x7ff;
static constexpr uint8_t UTF8_2B_FIRST = 0xc0;
static constexpr uint8_t UTF8_2B_SECOND = 0x80;
static constexpr uint8_t UTF8_2B_THIRD = 0x3f;
static constexpr uint8_t UTF8_3B_FIRST = 0xe0;
static constexpr uint8_t UTF8_3B_SECOND = 0x80;

View File

@ -1583,6 +1583,7 @@ void Builtins::InitializeString(const JSHandle<GlobalEnv> &env, const JSHandle<J
SetFunction(env, stringFuncPrototype, "padStart", BuiltinsString::PadStart, FunctionLength::ONE);
SetFunction(env, stringFuncPrototype, "padEnd", BuiltinsString::PadEnd, FunctionLength::ONE);
SetFunction(env, stringFuncPrototype, "replace", BuiltinsString::Replace, FunctionLength::TWO);
SetFunction(env, stringFuncPrototype, "replaceAll", BuiltinsString::ReplaceAll, FunctionLength::TWO);
SetFunction(env, stringFuncPrototype, "search", BuiltinsString::Search, FunctionLength::ONE);
SetFunction(env, stringFuncPrototype, "slice", BuiltinsString::Slice, FunctionLength::TWO);
SetFunction(env, stringFuncPrototype, "split", BuiltinsString::Split, FunctionLength::TWO);

View File

@ -913,10 +913,23 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
// v. Let n be n+1
++index;
}
// j. Let namedCaptures be ? Get(result, "groups").
JSHandle<JSTaggedValue> groupsKey = globalConst->GetHandledGroupsString();
JSTaggedValue named =
FastRuntimeStub::FastGetPropertyByValue(thread, resultValues.GetTaggedValue(), groupsKey.GetTaggedValue());
JSHandle<JSTaggedValue> namedCaptures(thread, named);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// m. If functionalReplace is true, then
CString replacement;
int emptyArrLength = 0;
if (namedCaptures->IsUndefined()) {
emptyArrLength = 3; // 3: «matched, pos, and string»
} else {
emptyArrLength = 4; // 4: «matched, pos, string, and groups»
}
JSHandle<TaggedArray> replacerArgs =
factory->NewTaggedArray(3 + capturesList->GetLength()); // 3: «matched, pos, and string»
factory->NewTaggedArray(emptyArrLength + capturesList->GetLength());
if (functionalReplace) {
// i. Let replacerArgs be «matched».
replacerArgs->Set(thread, 0, getMatchString.GetTaggedValue());
@ -929,6 +942,9 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
}
replacerArgs->Set(thread, index + 1, JSTaggedValue(position));
replacerArgs->Set(thread, index + 2, inputStr.GetTaggedValue()); // 2: position of string
if (!namedCaptures->IsUndefined()) {
replacerArgs->Set(thread, index + 3, namedCaptures.GetTaggedValue()); // 3: position of groups
}
// iv. Let replValue be Call(replaceValue, undefined, replacerArgs).
const size_t argsLength = replacerArgs->GetLength();
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
@ -944,8 +960,14 @@ JSTaggedValue BuiltinsRegExp::Replace(EcmaRuntimeCallInfo *argv)
replacement = ConvertToString(*replacementString, StringConvertedUsage::LOGICOPERATION);
} else {
// n. Else,
if (!namedCaptures->IsUndefined()) {
JSHandle<JSObject> namedCapturesObj = JSTaggedValue::ToObject(thread, namedCaptures);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
namedCaptures = JSHandle<JSTaggedValue>::Cast(namedCapturesObj);
}
JSHandle<JSTaggedValue> replacementHandle(
thread, BuiltinsString::GetSubstitution(thread, matchString, srcString, position, capturesList,
thread, BuiltinsString::GetSubstitution(thread, matchString, srcString,
position, capturesList, namedCaptures,
replaceValueHandle));
replacement = ConvertToString(EcmaString::Cast(replacementHandle->GetTaggedObject()),
StringConvertedUsage::LOGICOPERATION);
@ -1350,7 +1372,6 @@ bool BuiltinsRegExp::GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedV
uint8_t flags = static_cast<uint8_t>(regexpObj->GetOriginalFlags().GetInt());
return flags & mask;
}
// 21.2.5.2.2
JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
const JSHandle<JSTaggedValue> &inputStr, bool useCache)
@ -1389,14 +1410,6 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
JSMutableHandle<JSTaggedValue> flags(thread, regexpObj->GetOriginalFlags());
JSHandle<RegExpExecResultCache> cacheTable(thread->GetEcmaVM()->GetRegExpCache());
if (lastIndex == 0 && useCache) {
JSTaggedValue cacheResult =
cacheTable->FindCachedResult(thread, pattern, flags, inputStr, RegExpExecResultCache::EXEC_TYPE, regexp);
if (cacheResult != JSTaggedValue::Undefined()) {
return cacheResult;
}
}
uint32_t length = static_cast<EcmaString *>(inputStr->GetTaggedObject())->GetLength();
uint8_t flagsBits = static_cast<uint8_t>(regexpObj->GetOriginalFlags().GetInt());
JSHandle<JSTaggedValue> flagsValue(thread, FlagsBitsToString(thread, flagsBits));
@ -1457,6 +1470,17 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
// 27. Perform CreateDataProperty(A, "0", matched_substr).
JSHandle<JSTaggedValue> zeroValue(matchResult.captures_[0].second);
JSObject::CreateDataProperty(thread, results, 0, zeroValue);
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
JSHandle<JSTaggedValue> groupName(thread, regexpObj->GetGroupName());
JSMutableHandle<JSTaggedValue> groups(thread, JSTaggedValue::Undefined());
if (!groupName->IsUndefined()) {
JSHandle<JSTaggedValue> nullHandle(thread, JSTaggedValue::Null());
JSHandle<JSObject> nullObj = factory->OrdinaryNewJSObjectCreate(nullHandle);
groups.Update(nullObj.GetTaggedValue());
}
JSHandle<JSTaggedValue> groupsKey = globalConst->GetHandledGroupsString();
JSObject::CreateDataProperty(thread, results, groupsKey, groups);
// 28. For each integer i such that i > 0 and i <= n
for (uint32_t i = 1; i < capturesSize; i++) {
// a. Let capture_i be ith element of r's captures List
@ -1468,6 +1492,14 @@ JSTaggedValue BuiltinsRegExp::RegExpBuiltinExec(JSThread *thread, const JSHandle
}
JSHandle<JSTaggedValue> iValue(thread, capturedValue);
JSObject::CreateDataProperty(thread, results, i, iValue);
if (!groupName->IsUndefined()) {
JSHandle<JSObject> groupObject = JSHandle<JSObject>::Cast(groups);
TaggedArray *groupArray = TaggedArray::Cast(regexpObj->GetGroupName().GetTaggedObject());
if (groupArray->GetLength() > i - 1) {
JSHandle<JSTaggedValue> skey(thread, groupArray->Get(i - 1));
JSObject::CreateDataProperty(thread, groupObject, skey, iValue);
}
}
}
if (lastIndex == 0 && useCache) {
RegExpExecResultCache::AddResultInCache(thread, cacheTable, pattern, flags, inputStr,
@ -1672,6 +1704,15 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle<
regexp->SetOriginalSource(thread, patternStrHandle.GetTaggedValue());
// 12. Set the value of objs [[OriginalFlags]] internal slot to F.
regexp->SetOriginalFlags(thread, JSTaggedValue(flagsBits));
auto groupName = parser.GetGroupNames();
if (!groupName.empty()) {
JSHandle<TaggedArray> taggedArray = factory->NewTaggedArray(groupName.size());
for (size_t i = 0; i < groupName.size(); ++i) {
JSHandle<JSTaggedValue> flagsKey(factory->NewFromStdString(groupName[i].c_str()));
taggedArray->Set(thread, i, flagsKey);
}
regexp->SetGroupName(thread, taggedArray);
}
// 13. Set objs [[RegExpMatcher]] internal slot.
if (getCache.first == JSTaggedValue::Hole()) {
auto bufferSize = parser.GetOriginBufferSize();

View File

@ -662,8 +662,8 @@ JSTaggedValue BuiltinsString::MatchAll(EcmaRuntimeCallInfo *argv)
// b. If isRegExp is true, then
if (isJSRegExp) {
// i. Let flags be ? Get(searchValue, "flags").
JSHandle<JSTaggedValue> flagsKey(factory->NewFromASCII("flags"));
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, regexp, flagsKey).GetValue();
JSHandle<JSTaggedValue> flagsString(globalConst->GetHandledFlagsString());
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, regexp, flagsString).GetValue();
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// ii. Perform ? RequireObjectCoercible(flags).
JSTaggedValue::RequireObjectCoercible(thread, flags);
@ -992,13 +992,13 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
if (pos == -1) {
return thisString.GetTaggedValue();
}
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
JSMutableHandle<JSTaggedValue> replHandle(thread, factory->GetEmptyString().GetTaggedValue());
// If functionalReplace is true, then
if (replaceTag->IsCallable()) {
// Let replValue be Call(replaceValue, undefined,«matched, pos, and string»).
const size_t argsLength = 3; // 3: «matched, pos, and string»
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
EcmaRuntimeCallInfo info =
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceTag, undefined, undefined, argsLength);
info.SetCallArg(searchString.GetTaggedValue(), JSTaggedValue(pos), thisString.GetTaggedValue());
@ -1006,17 +1006,19 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
replHandle.Update(replStrDeocodeValue);
} else {
// Let captures be an empty List.
JSHandle<TaggedArray> capturesList = factory->NewTaggedArray(0);
JSHandle<TaggedArray> capturesList = factory->EmptyArray();
ASSERT_PRINT(replaceTag->IsString(), "replace must be string");
JSHandle<EcmaString> replacement(thread, replaceTag->GetTaggedObject());
// Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue)
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, replacement));
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos, capturesList, undefined, replacement));
}
JSHandle<EcmaString> realReplaceStr = JSTaggedValue::ToString(thread, replHandle);
// Let tailPos be pos + the number of code units in matched.
int32_t tailPos = pos + static_cast<int32_t>(searchString->GetLength());
// Let newString be the String formed by concatenating the first pos code units of string, replStr, and the trailing
// substring of string starting at index tailPos. If pos is 0, the first element of the concatenation will be the
// Let newString be the String formed by concatenating the first pos code units of string,
// replStr, and the trailing
// substring of string starting at index tailPos. If pos is 0,
// the first element of the concatenation will be the
// empty String.
// Return newString.
JSHandle<EcmaString> prefixString(thread, EcmaString::FastSubString(thisString, 0, pos, ecmaVm));
@ -1057,9 +1059,169 @@ JSTaggedValue BuiltinsString::Replace(EcmaRuntimeCallInfo *argv)
factory->NewFromUtf16LiteralNotCompress(uint16tData, stringBuilder.size()).GetTaggedValue();
}
JSTaggedValue BuiltinsString::ReplaceAll(EcmaRuntimeCallInfo *argv)
{
ASSERT(argv);
JSThread *thread = argv->GetThread();
BUILTINS_API_TRACE(thread, String, ReplaceAll);
[[maybe_unused]] EcmaHandleScope handleScope(thread);
JSHandle<JSTaggedValue> thisTag = JSTaggedValue::RequireObjectCoercible(thread, BuiltinsString::GetThis(argv));
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
auto ecmaVm = thread->GetEcmaVM();
JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
const GlobalEnvConstants *globalConst = thread->GlobalConstants();
JSHandle<JSTaggedValue> searchTag = BuiltinsString::GetCallArg(argv, 0);
JSHandle<JSTaggedValue> replaceTag = BuiltinsString::GetCallArg(argv, 1);
ObjectFactory *factory = ecmaVm->GetFactory();
if (!searchTag->IsUndefined() && !searchTag->IsNull()) {
// a. Let isRegExp be ? IsRegExp(searchValue).
bool isJSRegExp = JSObject::IsRegExp(thread, searchTag);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// b. If isRegExp is true, then
if (isJSRegExp) {
// i. Let flags be ? Get(searchValue, "flags").
JSHandle<JSTaggedValue> flagsString(globalConst->GetHandledFlagsString());
JSHandle<JSTaggedValue> flags = JSObject::GetProperty(thread, searchTag, flagsString).GetValue();
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// ii. Perform ? RequireObjectCoercible(flags).
JSTaggedValue::RequireObjectCoercible(thread, flags);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// iii. If ? ToString(flags) does not contain "g", throw a TypeError exception.
JSHandle<EcmaString> flagString = JSTaggedValue::ToString(thread, flags);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
JSHandle<EcmaString> gString(globalConst->GetHandledGString());
int32_t pos = flagString->IndexOf(*gString);
if (pos == -1) {
THROW_TYPE_ERROR_AND_RETURN(thread,
"string.prototype.replaceAll called with a non-global RegExp argument",
JSTaggedValue::Exception());
}
}
// c. Let replacer be ? GetMethod(searchValue, @@replace).
JSHandle<JSTaggedValue> replaceKey = env->GetReplaceSymbol();
JSHandle<JSTaggedValue> replaceMethod = JSObject::GetMethod(thread, searchTag, replaceKey);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// d. If replacer is not undefined, then
if (!replaceMethod->IsUndefined()) {
// i. Return ? Call(replacer, searchValue, «O, replaceValue»).
const size_t argsLength = 2;
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
EcmaRuntimeCallInfo info =
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceMethod, searchTag, undefined, argsLength);
info.SetCallArg(thisTag.GetTaggedValue(), replaceTag.GetTaggedValue());
return JSFunction::Call(&info);
}
}
// 3. Let string be ? ToString(O).
JSHandle<EcmaString> thisString = JSTaggedValue::ToString(thread, thisTag);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// 4. Let searchString be ? ToString(searchValue).
JSHandle<EcmaString> searchString = JSTaggedValue::ToString(thread, searchTag);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// 5. Let functionalReplace be IsCallable(replaceValue).
// 6. If functionalReplace is false, then
if (!replaceTag->IsCallable()) {
// a. Set replaceValue to ? ToString(replaceValue).
replaceTag = JSHandle<JSTaggedValue>(JSTaggedValue::ToString(thread, replaceTag));
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
}
// 7. Let searchLength be the length of searchString.
// 8. Let advanceBy be max(1, searchLength).
int32_t searchLength = searchString->GetLength();
int32_t advanceBy = std::max(1, searchLength);
// 9. Let matchPositions be a new empty List.
std::u16string stringBuilder;
std::u16string stringPrefixString;
std::u16string stringRealReplaceStr;
std::u16string stringSuffixString;
// 10. Let position be ! StringIndexOf(string, searchString, 0).
int32_t pos = thisString->IndexOf(*searchString);
int32_t endOfLastMatch = 0;
bool canBeCompress = true;
JSHandle<JSTaggedValue> undefined = globalConst->GetHandledUndefined();
JSMutableHandle<JSTaggedValue> replHandle(thread, factory->GetEmptyString().GetTaggedValue());
while (pos != -1) {
// If functionalReplace is true, then
if (replaceTag->IsCallable()) {
// Let replValue be Call(replaceValue, undefined,«matched, pos, and string»).
const size_t argsLength = 3; // 3: «matched, pos, and string»
EcmaRuntimeCallInfo info =
EcmaInterpreter::NewRuntimeCallInfo(thread, replaceTag, undefined, undefined, argsLength);
info.SetCallArg(searchString.GetTaggedValue(), JSTaggedValue(pos), thisString.GetTaggedValue());
JSTaggedValue replStrDeocodeValue = JSFunction::Call(&info);
replHandle.Update(replStrDeocodeValue);
} else {
// Let captures be an empty List.
JSHandle<TaggedArray> capturesList = factory->NewTaggedArray(0);
ASSERT_PRINT(replaceTag->IsString(), "replace must be string");
JSHandle<EcmaString> replacement(thread, replaceTag->GetTaggedObject());
// Let replStr be GetSubstitution(matched, string, pos, captures, replaceValue)
replHandle.Update(GetSubstitution(thread, searchString, thisString, pos,
capturesList, undefined, replacement));
}
JSHandle<EcmaString> realReplaceStr = JSTaggedValue::ToString(thread, replHandle);
// Let tailPos be pos + the number of code units in matched.
// Let newString be the String formed by concatenating the first pos code units of string,
// replStr, and the trailing substring of string starting at index tailPos.
// If pos is 0, the first element of the concatenation will be the
// empty String.
// Return newString.
JSHandle<EcmaString> prefixString(thread,
EcmaString::FastSubString(thisString, endOfLastMatch,
pos - endOfLastMatch, ecmaVm));
if (prefixString->IsUtf16()) {
const uint16_t *data = prefixString->GetDataUtf16();
stringPrefixString = base::StringHelper::Utf16ToU16String(data, prefixString->GetLength());
canBeCompress = false;
} else {
const uint8_t *data = prefixString->GetDataUtf8();
stringPrefixString = base::StringHelper::Utf8ToU16String(data, prefixString->GetLength());
}
if (realReplaceStr->IsUtf16()) {
const uint16_t *data = realReplaceStr->GetDataUtf16();
stringRealReplaceStr = base::StringHelper::Utf16ToU16String(data, realReplaceStr->GetLength());
canBeCompress = false;
} else {
const uint8_t *data = realReplaceStr->GetDataUtf8();
stringRealReplaceStr = base::StringHelper::Utf8ToU16String(data, realReplaceStr->GetLength());
}
stringBuilder = stringBuilder + stringPrefixString + stringRealReplaceStr;
endOfLastMatch = pos + searchLength;
pos = thisString->IndexOf(*searchString, pos + advanceBy);
}
if (endOfLastMatch < static_cast<int32_t>(thisString->GetLength())) {
JSHandle<EcmaString> suffixString(thread,
EcmaString::FastSubString(thisString, endOfLastMatch,
thisString->GetLength() - endOfLastMatch, ecmaVm));
if (suffixString->IsUtf16()) {
const uint16_t *data = suffixString->GetDataUtf16();
stringSuffixString = base::StringHelper::Utf16ToU16String(data, suffixString->GetLength());
canBeCompress = false;
} else {
const uint8_t *data = suffixString->GetDataUtf8();
stringSuffixString = base::StringHelper::Utf8ToU16String(data, suffixString->GetLength());
}
stringBuilder = stringBuilder + stringSuffixString;
}
auto *char16tData = const_cast<char16_t *>(stringBuilder.c_str());
auto *uint16tData = reinterpret_cast<uint16_t *>(char16tData);
return canBeCompress ?
factory->NewFromUtf16LiteralCompress(uint16tData, stringBuilder.length()).GetTaggedValue() :
factory->NewFromUtf16LiteralNotCompress(uint16tData, stringBuilder.length()).GetTaggedValue();
}
JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<EcmaString> &matched,
const JSHandle<EcmaString> &srcString, int position,
const JSHandle<TaggedArray> &captureList,
const JSHandle<JSTaggedValue> &namedCaptures,
const JSHandle<EcmaString> &replacement)
{
BUILTINS_API_TRACE(thread, String, GetSubstitution);
@ -1073,7 +1235,6 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
if (nextDollarIndex < 0) {
return replacement.GetTaggedValue();
}
std::u16string stringBuilder;
bool canBeCompress = true;
if (nextDollarIndex > 0) {
@ -1099,6 +1260,7 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
}
int continueFromIndex = -1;
uint16_t peek = replacement->At(peekIndex);
int32_t p = 0;
switch (peek) {
case '$': // $$
stringBuilder += '$';
@ -1193,6 +1355,42 @@ JSTaggedValue BuiltinsString::GetSubstitution(JSThread *thread, const JSHandle<E
continueFromIndex = peekIndex + advance;
break;
}
case '<': {
if (namedCaptures->IsUndefined()) {
stringBuilder += '$';
continueFromIndex = peekIndex;
break;
}
JSHandle<EcmaString> greaterSymString = factory->NewFromASCII(">");
int pos = replacement->IndexOf(*greaterSymString, peekIndex);
if (pos == -1) {
stringBuilder += '$';
continueFromIndex = peekIndex;
break;
}
JSHandle<EcmaString> groupName(thread,
EcmaString::FastSubString(replacement,
peekIndex + 1, pos - peekIndex - 1, ecmaVm));
JSHandle<JSTaggedValue> names(groupName);
JSHandle<JSTaggedValue> capture = JSObject::GetProperty(thread, namedCaptures, names).GetValue();
if (capture->IsUndefined()) {
continueFromIndex = pos + 1;
p = pos;
break;
}
JSHandle<EcmaString> captureName(capture);
if (captureName->IsUtf16()) {
const uint16_t *data = captureName->GetDataUtf16();
stringBuilder += base::StringHelper::Utf16ToU16String(data, captureName->GetLength());
canBeCompress = false;
} else {
const uint8_t *data = captureName->GetDataUtf8();
stringBuilder += base::StringHelper::Utf8ToU16String(data, captureName->GetLength());
}
continueFromIndex = pos + 1;
p = pos;
break;
}
default:
stringBuilder += '$';
continueFromIndex = peekIndex;

View File

@ -43,6 +43,7 @@ public:
static JSTaggedValue GetSubstitution(JSThread *thread, const JSHandle<EcmaString> &matched,
const JSHandle<EcmaString> &srcString, int position,
const JSHandle<TaggedArray> &captureList,
const JSHandle<JSTaggedValue> &namedCaptures,
const JSHandle<EcmaString> &replacement);
// 21.1.3.1
static JSTaggedValue CharAt(EcmaRuntimeCallInfo *argv);
@ -78,6 +79,7 @@ public:
// 21.1.3.14
static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv);
// 21.1.3.14.1 Runtime Semantics: GetSubstitution()
static JSTaggedValue ReplaceAll(EcmaRuntimeCallInfo *argv);
// 21.1.3.15
static JSTaggedValue Search(EcmaRuntimeCallInfo *argv);
// 21.1.3.16

View File

@ -1932,13 +1932,16 @@ void JSRegExp::Dump(std::ostream &os) const
{
os << "\n";
os << " - ByteCodeBuffer: ";
GetByteCodeBuffer().Dump(os);
GetByteCodeBuffer().D();
os << "\n";
os << " - OriginalSource: ";
GetOriginalSource().Dump(os);
GetOriginalSource().D();
os << "\n";
os << " - OriginalFlags: ";
GetOriginalFlags().Dump(os);
GetOriginalFlags().D();
os << "\n";
os << " - GroupName: ";
GetGroupName().D();
os << "\n";
os << " - Length: " << GetLength();
os << "\n";
@ -4086,7 +4089,7 @@ void JSRegExp::DumpForSnapshot(std::vector<std::pair<CString, JSTaggedValue>> &v
{
vec.push_back(std::make_pair(CString("originalSource"), GetOriginalSource()));
vec.push_back(std::make_pair(CString("originalFlags"), GetOriginalFlags()));
vec.push_back(std::make_pair(CString("groupName"), GetGroupName()));
JSObject::DumpForSnapshot(vec);
}

View File

@ -179,12 +179,13 @@ int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const
const EcmaString *lhs = this;
int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
if (rhsCount == 0) {
return pos;
if (pos > lhsCount) {
return -1;
}
if (pos >= lhsCount) {
return -1;
if (rhsCount == 0) {
return pos;
}
if (pos < 0) {

View File

@ -407,6 +407,7 @@ void GlobalEnvConstants::InitGlobalConstant(JSThread *thread)
SetConstant(ConstantIndex::FRACTION_STRING_INDEX, factory->NewFromASCIINonMovable("fraction"));
SetConstant(ConstantIndex::DECIMAL_STRING_INDEX, factory->NewFromASCIINonMovable("decimal"));
SetConstant(ConstantIndex::GROUP_STRING_INDEX, factory->NewFromASCIINonMovable("group"));
SetConstant(ConstantIndex::GROUPS_STRING_INDEX, factory->NewFromASCIINonMovable("groups"));
SetConstant(ConstantIndex::CURRENCY_STRING_INDEX, factory->NewFromASCIINonMovable("currency"));
SetConstant(ConstantIndex::CURRENCY_SIGN_STRING_INDEX, factory->NewFromASCIINonMovable("currencySign"));
SetConstant(ConstantIndex::CURRENCY_DISPLAY_STRING_INDEX, factory->NewFromASCIINonMovable("currencyDisplay"));

View File

@ -280,6 +280,7 @@ class JSThread;
V(JSTaggedValue, FractionString, FRACTION_STRING_INDEX, fraction) \
V(JSTaggedValue, DecimalString, DECIMAL_STRING_INDEX, decimal) \
V(JSTaggedValue, GroupString, GROUP_STRING_INDEX, group) \
V(JSTaggedValue, GroupsString, GROUPS_STRING_INDEX, groups) \
V(JSTaggedValue, CurrencyString, CURRENCY_STRING_INDEX, currency) \
V(JSTaggedValue, CurrencySignString, CURRENCY_SIGN_STRING_INDEX, currencySign) \
V(JSTaggedValue, CurrencyDisplayString, CURRENCY_DISPLAY_STRING_INDEX, currencyDisplay) \

View File

@ -29,7 +29,8 @@ public:
static constexpr size_t REGEXP_BYTE_CODE_OFFSET = JSObject::SIZE;
ACCESSORS(ByteCodeBuffer, REGEXP_BYTE_CODE_OFFSET, ORIGINAL_SOURCE_OFFSET)
ACCESSORS(OriginalSource, ORIGINAL_SOURCE_OFFSET, ORIGINAL_FLAGS_OFFSET)
ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, LENGTH_OFFSET)
ACCESSORS(OriginalFlags, ORIGINAL_FLAGS_OFFSET, GROUP_NAME_OFFSET)
ACCESSORS(GroupName, GROUP_NAME_OFFSET, LENGTH_OFFSET)
ACCESSORS_PRIMITIVE_FIELD(Length, uint32_t, LENGTH_OFFSET, LAST_OFFSET)
DEFINE_ALIGN_SIZE(LAST_OFFSET);

View File

@ -1055,6 +1055,7 @@ void ObjectFactory::InitializeJSObject(const JSHandle<JSObject> &obj, const JSHa
JSRegExp::Cast(*obj)->SetByteCodeBuffer(thread_, JSTaggedValue::Undefined());
JSRegExp::Cast(*obj)->SetOriginalSource(thread_, JSTaggedValue::Undefined());
JSRegExp::Cast(*obj)->SetOriginalFlags(thread_, JSTaggedValue(0));
JSRegExp::Cast(*obj)->SetGroupName(thread_, JSTaggedValue::Undefined());
JSRegExp::Cast(*obj)->SetLength(0);
break;
case JSType::JS_PRIMITIVE_REF:

View File

@ -344,7 +344,6 @@ public:
}
return false;
}
inline uint32_t HighestValue() const
{
if (!rangeSet_.empty()) {
@ -352,7 +351,6 @@ public:
}
return 0;
}
RangeSet(RangeSet const &) = default;
RangeSet &operator=(RangeSet const &) = default;
RangeSet(RangeSet &&) = default;

View File

@ -21,10 +21,15 @@
#include "libpandabase/utils/utils.h"
#include "securec.h"
#include "unicode/uniset.h"
#include "third_party/icu/icu4c/source/common/unicode/uchar.h"
#define _NO_DEBUG_
namespace panda::ecmascript {
static constexpr uint32_t CACHE_SIZE = 128;
static constexpr uint32_t ID_START_TABLE_ASCII[4] = {
/* $ A-Z _ a-z */
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
};
static RangeSet g_rangeD(0x30, 0x39); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
static RangeSet g_rangeS({
@ -539,6 +544,7 @@ bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
return false;
}
groupNames_.EmitStr(name.c_str());
newGroupNames_.push_back(name);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
PrintF("group name %s", name.c_str());
Advance();
@ -758,24 +764,42 @@ void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int cap
bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
{
const uint8_t *p = *pp;
int c = *p;
while (c != '>') {
if (c < (INT8_MAX + 1)) {
if (name.empty()) {
if (!g_regexpIdentifyStart.IsContain(c)) {
return false;
}
} else {
if (!g_regexpIdentifyContinue.IsContain(c)) {
return false;
}
uint32_t c ;
char buffer[CACHE_SIZE] = {0};
char *q = buffer;
while (true) {
c = *p;
if (c == '\\') {
p++;
if (*p != 'u') {
return false;
}
name += static_cast<char>(c);
if (!ParseUnicodeEscape(&c)) {
return false;
}
} else if (c == '>') {
break;
} else if (c > CACHE_SIZE) {
c = base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p);
} else {
p++;
}
c = *++p; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
}
p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
if (q == buffer) {
if (!IsIdentFirst(c)) {
return false;
}
} else {
if (!u_isIDPart(c)) {
return false;
}
}
if (q != nullptr) {
*q++ = c;
}
} // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
p++;
*pp = p;
name = buffer;
return true;
}
@ -784,6 +808,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName)
const uint8_t *p;
int captureIndex = 1;
CString name;
hasNamedCaptures_ = 0;
for (p = base_; p < end_; p++) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
switch (*p) {
case '(': {
@ -793,6 +818,7 @@ int RegExpParser::ParseCaptureCount(const char *groupName)
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
p[CAPTURE_CONUT_ADVANCE] != '=') {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
hasNamedCaptures_ = 1;
p += CAPTURE_CONUT_ADVANCE;
if (groupName != nullptr) {
if (ParseGroupSpecifier(&p, name)) {
@ -836,6 +862,7 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
int result = -1;
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
PrintF("Parse AtomEscape------\n");
PrevOpCode prevOp;
switch (c0_) {
case KEY_EOF:
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
@ -870,50 +897,108 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
case 'd': {
// [0-9]
RangeOpCode rangeOp;
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, g_rangeD);
Advance();
goto parseLookBehind;
} break;
case 'D': {
// [^0-9]
RangeSet atomRange(g_rangeD);
atomRange.Invert(IsUtf16());
Range32OpCode rangeOp;
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, atomRange);
Advance();
goto parseLookBehind;
} break;
case 's': {
// [\f\n\r\t\v]
RangeOpCode rangeOp;
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, g_rangeS);
Advance();
goto parseLookBehind;
} break;
case 'S': {
RangeSet atomRange(g_rangeS);
atomRange.Invert(IsUtf16());
Range32OpCode rangeOp;
atomRange.Invert(IsUtf16());
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, atomRange);
Advance();
goto parseLookBehind;
} break;
case 'w': {
// [A-Za-z0-9]
RangeOpCode rangeOp;
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, g_rangeW);
Advance();
goto parseLookBehind;
} break;
case 'W': {
// [^A-Za-z0-9]
RangeSet atomRange(g_rangeW);
atomRange.Invert(IsUtf16());
Range32OpCode rangeOp;
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
rangeOp.InsertOpCode(&buffer_, atomRange);
Advance();
goto parseLookBehind;
} break;
// P{UnicodePropertyValueExpression}
// p{UnicodePropertyValueExpression}
case 'P':
case 'p':
// [+N]kGroupName[?U]
case 'k':
case 'k': {
Advance();
if (c0_ != '<') {
if (!IsUtf16() || HasNamedCaptures()) {
ParseError("expecting group name.");
break;
}
}
Advance();
Prev();
CString name;
auto **pp = const_cast<const uint8_t **>(&pc_);
if (!ParseGroupSpecifier(pp, name)) {
ParseError("GroupName Syntax error.");
break;
}
int postion = FindGroupName(name);
if (postion < 0) {
postion = ParseCaptureCount(name.c_str());
if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) {
ParseError("group name not defined");
break;
}
}
if (isBackward) {
BackwardBackReferenceOpCode backReferenceOp;
backReferenceOp.EmitOpCode(&buffer_, postion);
} else {
BackReferenceOpCode backReferenceOp;
backReferenceOp.EmitOpCode(&buffer_, postion);
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
Advance();
} break;
parseLookBehind: {
if (isBackward) {
prevOp.EmitOpCode(&buffer_, 0);
}
Advance();
break;
}
default:
result = ParseCharacterEscape();
break;
@ -921,6 +1006,22 @@ int RegExpParser::ParseAtomEscape(bool isBackward)
return result;
}
int RegExpParser::RecountCaptures()
{
if (totalCaptureCount_ < 0) {
const char *name = reinterpret_cast<const char*>(groupNames_.buf_);
totalCaptureCount_ = ParseCaptureCount(name);
}
return totalCaptureCount_;
}
bool RegExpParser::HasNamedCaptures()
{
if (hasNamedCaptures_ < 0) {
RecountCaptures();
}
return false;
}
int RegExpParser::ParseCharacterEscape()
{
// CharacterEscape[U]::
@ -1304,4 +1405,13 @@ void RegExpParser::ParseError(const char *errorMessage)
UNREACHABLE();
}
}
} // namespace panda::ecmascript
int RegExpParser::IsIdentFirst(uint32_t c)
{
if (c < CACHE_SIZE) {
return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31
} else {
return u_isIDStart(c);
}
}
} // namespace panda::ecmascript

View File

@ -28,6 +28,7 @@
#include "unicode/utf16.h"
#include "unicode/utf8.h"
#include "unicode/utypes.h"
#include "unicode/udata.h"
namespace panda::ecmascript {
class RegExpParser {
@ -51,6 +52,7 @@ public:
static constexpr uint32_t UNICODE_HEX_VALUE = 4;
static constexpr uint32_t UNICODE_HEX_ADVANCE = 2;
static constexpr uint32_t CAPTURE_CONUT_ADVANCE = 3;
static constexpr uint32_t UTF8_CHAR_LEN_MAX = 6;
explicit RegExpParser(Chunk *chunk)
: base_(nullptr),
@ -105,7 +107,21 @@ public:
bool ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value);
bool ParseUnicodeEscape(uint32_t *value);
bool ParserIntervalQuantifier(int *pmin, int *pmax);
bool HasNamedCaptures();
int ParseEscape(const uint8_t **pp, int isUtf16);
int RecountCaptures();
int IsIdentFirst(uint32_t c);
inline std::vector<CString> GetGroupNames() const
{
return newGroupNames_;
}
inline size_t GetGroupNamesSize() const
{
return groupNames_.size_ ;
}
inline bool IsError() const
{
return isError_;
@ -227,8 +243,11 @@ private:
int stackCount_;
bool isError_;
char errorMsg_[TMP_BUF_SIZE] = {0}; // NOLINTNEXTLINE(modernize-avoid-c-arrays)
int hasNamedCaptures_ = -1;
int totalCaptureCount_ = -1;
DynChunk buffer_;
DynChunk groupNames_;
std::vector<CString> newGroupNames_;
};
} // namespace panda::ecmascript
#endif // ECMASCRIPT_REGEXP_PARSER_H

View File

@ -505,6 +505,7 @@ namespace panda::ecmascript {
V(String, PadEnd) \
V(String, Repeat) \
V(String, Replace) \
V(String, ReplaceAll) \
V(String, Search) \
V(String, Slice) \
V(String, Split) \

View File

@ -472,6 +472,7 @@ static uintptr_t g_nativeTable[] = {
reinterpret_cast<uintptr_t>(BuiltinsString::PadStart),
reinterpret_cast<uintptr_t>(BuiltinsString::Repeat),
reinterpret_cast<uintptr_t>(BuiltinsString::Replace),
reinterpret_cast<uintptr_t>(BuiltinsString::ReplaceAll),
reinterpret_cast<uintptr_t>(BuiltinsString::Search),
reinterpret_cast<uintptr_t>(BuiltinsString::Slice),
reinterpret_cast<uintptr_t>(BuiltinsString::Split),

View File

@ -275,6 +275,7 @@ static JSHandle<JSRegExp> NewJSRegExp(JSThread *thread, ObjectFactory *factory,
JSHandle<JSRegExp> jSRegExp = JSHandle<JSRegExp>::Cast(factory->NewJSObject(jSRegExpClass));
jSRegExp->SetByteCodeBuffer(thread, JSTaggedValue::Undefined());
jSRegExp->SetOriginalSource(thread, JSTaggedValue::Undefined());
jSRegExp->SetGroupName(thread, JSTaggedValue::Undefined());
jSRegExp->SetOriginalFlags(thread, JSTaggedValue(0));
jSRegExp->SetLength(0);
return jSRegExp;
@ -500,7 +501,7 @@ HWTEST_F_L0(EcmaDumpTest, HeapProfileDump)
break;
}
case JSType::JS_REG_EXP: {
CHECK_DUMP_FIELDS(JSObject::SIZE, JSRegExp::SIZE, 4U)
CHECK_DUMP_FIELDS(JSObject::SIZE, JSRegExp::SIZE, 5U)
NEW_OBJECT_AND_DUMP(JSRegExp, JS_REG_EXP)
break;
}