!10129 regexp 优化replaceInternal接口

Merge pull request !10129 from jiangmengyang/regex
This commit is contained in:
openharmony_ci 2024-11-08 02:35:58 +00:00 committed by Gitee
commit 3bc375ca33
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 144 additions and 50 deletions

View File

@ -1153,8 +1153,9 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
bool isUtf8 = true;
uint32_t resultStrLength = 0;
uint32_t resultArrayLength = (static_cast<uint32_t>(resultsIndex) + 1) * 2;
JSHandle<TaggedArray> resultArray = factory->NewTaggedArray(resultArrayLength);
CVector<JSHandle<JSTaggedValue>> resultArray(resultArrayLength, globalConst->GetHandledHole());
std::vector<uint64_t> resultLengthArray(resultArrayLength);
CVector<JSHandle<JSTaggedValue>> capturesList;
// 15. Let nextSourcePosition be 0.
uint32_t nextSourcePosition = 0;
JSMutableHandle<JSTaggedValue> getMatchString(thread, JSTaggedValue::Undefined());
@ -1202,14 +1203,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
}
// i. Let position be max(min(position, lengthS), 0).
position = std::max<uint32_t>(std::min<uint32_t>(position, length), 0);
// j. Let n be 1.
uint32_t index = 1;
// k. Let captures be an empty List.
JSHandle<TaggedArray> capturesList = factory->NewTaggedArray(ncaptures);
// l. Repeat while n ≤ nCaptures
while (index <= ncaptures) {
// j. Let captures be an empty List.
capturesList.resize(ncaptures);
// l. Repeat while n < nCaptures
for (uint32_t index = 0; index < ncaptures; index++) {
// i. Let capN be Get(result, ToString(n)).
capN.Update(ObjectFastOperator::FastGetPropertyByIndex(thread, resultValues.GetTaggedValue(), index));
capN.Update(ObjectFastOperator::FastGetPropertyByIndex(thread, resultValues.GetTaggedValue(), index + 1));
// ii. ReturnIfAbrupt(capN).
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
// iii. If capN is not undefined, then
@ -1218,16 +1217,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
JSHandle<EcmaString> capNStr = JSTaggedValue::ToString(thread, capN);
// 2. ReturnIfAbrupt(capN).
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
JSHandle<JSTaggedValue> capnStr = JSHandle<JSTaggedValue>::Cast(capNStr);
capturesList->Set(thread, index - 1, capnStr);
capturesList[index] = JSHandle<JSTaggedValue>(thread, capNStr.GetTaggedValue());
} else {
// iv. Append capN as the last element of captures.
capturesList->Set(thread, index - 1, capN);
capturesList[index] = JSHandle<JSTaggedValue>(thread, capN.GetTaggedValue());
}
// v. Let n be n+1
++index;
}
// j. Let namedCaptures be ? Get(result, "groups").
JSTaggedValue named = GetExecResultGroups(thread, resultValues, isFastPath);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
@ -1240,29 +1235,26 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
} else {
emptyArrLength = 4; // 4: «matched, pos, string, and groups»
}
JSHandle<TaggedArray> replacerArgs =
factory->NewTaggedArray(emptyArrLength + capturesList->GetLength());
if (functionalReplace) {
// i. Let replacerArgs be «matched».
replacerArgs->Set(thread, 0, getMatchString.GetTaggedValue());
// ii. Append in list order the elements of captures to the end of the List replacerArgs.
// iii. Append position and S as the last two elements of replacerArgs.
index = 0;
while (index < capturesList->GetLength()) {
replacerArgs->Set(thread, index + 1, capturesList->Get(index));
++index;
}
replacerArgs->Set(thread, index + 1, JSTaggedValue(position));
replacerArgs->Set(thread, index + 2, inputStr.GetTaggedValue()); // 2: position of string
if (!namedCaptures->IsUndefined()) {
replacerArgs->Set(thread, index + 3, namedCaptures.GetTaggedValue()); // 3: position of groups
}
// iv. Let replValue be Call(replaceValue, undefined, replacerArgs).
const uint32_t argsLength = replacerArgs->GetLength();
// Let replValue be Call(replaceValue, undefined, replacerArgs).
EcmaRuntimeCallInfo *info =
EcmaInterpreter::NewRuntimeCallInfo(thread, inputReplaceValue, undefined, undefined, argsLength);
EcmaInterpreter::NewRuntimeCallInfo(thread,
inputReplaceValue, undefined, undefined, emptyArrLength + ncaptures);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
info->SetCallArg(argsLength, replacerArgs);
// i. Let replacerArgs be «matched».
info->SetCallArg(0, getMatchString.GetTaggedValue());
// ii. Append in list order the elements of captures to the end of the List replacerArgs.
for (uint32_t index = 0; index < ncaptures; index++) {
info->SetCallArg(index + 1, capturesList[index].GetTaggedValue());
}
// iii. Append position and S as the last two elements of replacerArgs.
info->SetCallArg(ncaptures + EXEC_RESULT_INDEX_OFFSET, JSTaggedValue(position));
info->SetCallArg(ncaptures + EXEC_RESULT_INPUT_OFFSET, inputStr.GetTaggedValue());
if (!namedCaptures->IsUndefined()) {
// iv. position of groups
info->SetCallArg(ncaptures + EXEC_RESULT_GROUPS_OFFSET, namedCaptures.GetTaggedValue());
}
JSTaggedValue replaceResult = JSFunction::Call(info);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
JSHandle<JSTaggedValue> replValue(thread, replaceResult);
@ -1272,32 +1264,37 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
} else {
// n. Else,
JSHandle<TaggedArray> capturesArray = factory->NewTaggedArray(ncaptures);
if (!namedCaptures->IsUndefined()) {
JSHandle<JSObject> namedCapturesObj = JSTaggedValue::ToObject(thread, namedCaptures);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
namedCaptures = JSHandle<JSTaggedValue>::Cast(namedCapturesObj);
}
for (uint32_t index = 0; index < ncaptures; index++) {
capturesArray->Set(thread, index, capturesList[index]);
}
replacementString.Update(BuiltinsString::GetSubstitution(thread, matchString, srcString,
position, capturesList, namedCaptures, replaceValueHandle));
position, capturesArray, namedCaptures, replaceValueHandle));
}
// p. If position ≥ nextSourcePosition, then
if (position >= nextSourcePosition) {
ASSERT(REPLACE_RESULT_VAL * i + 1 < resultArray.size());
// ii. Let accumulatedResult be the String formed by concatenating the code units of the current value
// of accumulatedResult with the substring of S consisting of the code units from nextSourcePosition
// (inclusive) up to position (exclusive) and with the code units of replacement.
// store undefined in resultArray
resultArray->Set(thread, REPLACE_RESULT_VAL * i, JSTaggedValue::Undefined());
resultArray[REPLACE_RESULT_VAL * i] = globalConst->GetHandledUndefined();
uint64_t bits = 0;
bits |= ReplaceLengthField::Encode(position - nextSourcePosition);
bits |= ReplacePositionField::Encode(nextSourcePosition);
// store position and length bits in resultLengthArray
resultLengthArray[REPLACE_RESULT_VAL * i] = bits;
resultStrLength += (position - nextSourcePosition);
auto subString = EcmaStringAccessor::FastSubString(
isUtf8 &= EcmaStringAccessor::SubStringIsUtf8(
thread->GetEcmaVM(), srcString, nextSourcePosition, position - nextSourcePosition);
isUtf8 &= EcmaStringAccessor(subString).IsUtf8();
// store replacement string in resultArray
resultArray->Set(thread, REPLACE_RESULT_VAL * i + 1, replacementString.GetTaggedValue());
resultArray[REPLACE_RESULT_VAL * i + 1] =
JSHandle<JSTaggedValue>(thread, replacementString.GetTaggedValue());
uint32_t replacementLength = EcmaStringAccessor(replacementString).GetLength();
// store length of replacement string in resultLengthArray
resultLengthArray[REPLACE_RESULT_VAL * i + 1] = static_cast<uint64_t>(replacementLength);
@ -1311,13 +1308,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
// 17. If nextSourcePosition ≥ lengthS, return accumulatedResult.
if (nextSourcePosition < length) {
// store undefined in resultArray
resultArray->Set(thread, REPLACE_RESULT_VAL * resultsIndex, JSTaggedValue::Undefined());
resultArray[REPLACE_RESULT_VAL * resultsIndex] = globalConst->GetHandledUndefined();
uint64_t bits = 0;
bits |= ReplaceLengthField::Encode(length - nextSourcePosition);
bits |= ReplacePositionField::Encode(nextSourcePosition);
auto subStringEnd = EcmaStringAccessor::FastSubString(
isUtf8 &= EcmaStringAccessor::SubStringIsUtf8(
thread->GetEcmaVM(), srcString, nextSourcePosition, length - nextSourcePosition);
isUtf8 &= EcmaStringAccessor(subStringEnd).IsUtf8();
// store position and length bits in resultLengthArray
resultLengthArray[REPLACE_RESULT_VAL * resultsIndex] = bits;
resultStrLength += (length - nextSourcePosition);
@ -2891,7 +2887,8 @@ JSTaggedValue BuiltinsRegExp::GetExecResultGroups(JSThread *thread, const JSHand
}
JSHandle<EcmaString> BuiltinsRegExp::CreateStringFromResultArray(JSThread *thread,
const JSHandle<TaggedArray> resultArray, const std::vector<uint64_t> &resultLengthArray,
const CVector<JSHandle<JSTaggedValue>> &resultArray,
const std::vector<uint64_t> &resultLengthArray,
JSHandle<EcmaString> srcString, uint32_t resultStrLength, bool isUtf8)
{
JSHandle<EcmaString> result = JSHandle<EcmaString>(thread,
@ -2902,9 +2899,9 @@ JSHandle<EcmaString> BuiltinsRegExp::CreateStringFromResultArray(JSThread *threa
}
FlatStringInfo resultInfo = FlatStringInfo(*result, 0, resultStrLength);
uint32_t nextPos = 0;
uint32_t resultArrayLength = resultArray->GetLength();
for (int i = 0; i < static_cast<int>(resultArrayLength); i++) {
JSTaggedValue substrValue = resultArray->Get(thread, i);
uint32_t resultArrayLength = resultArray.size();
for (uint32_t i = 0; i < resultArrayLength; i++) {
JSTaggedValue substrValue = resultArray[i].GetTaggedValue();
if (substrValue.IsHole()) {
continue;
}

View File

@ -196,7 +196,8 @@ private:
JSHandle<EcmaString> inputString, int32_t lastIndex);
static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
JSHandle<JSTaggedValue> string, uint32_t limit, bool useCache);
static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread, const JSHandle<TaggedArray> resultArray,
static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread,
const CVector<JSHandle<JSTaggedValue>> &resultArray,
const std::vector<uint64_t> &resultLengthArray, JSHandle<EcmaString> srcString,
uint32_t resultStrLength, bool isUtf8);
};

View File

@ -185,6 +185,20 @@ EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
return FastSubString(vm, src, start, length);
}
bool EcmaString::SubStringIsUtf8(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
{
ASSERT((start + length) <= src->GetLength());
if (length == 0) {
return true;
}
if (src->IsUtf8()) {
return true;
}
FlatStringInfo srcFlat = FlattenAllString(vm, src);
return CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
}
void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
{
ASSERT(IsLineString() && !IsConstantString());

View File

@ -143,6 +143,8 @@ private:
uint32_t length, bool compressed);
static EcmaString *FastSubString(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
static bool SubStringIsUtf8(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
static EcmaString *GetSlicedString(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
static EcmaString *GetSubString(const EcmaVM *vm,
@ -1139,7 +1141,11 @@ public:
{
return EcmaString::FastSubString(vm, src, start, length);
}
static bool SubStringIsUtf8(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
{
return EcmaString::SubStringIsUtf8(vm, src, start, length);
}
// get
static EcmaString *GetSubString(const EcmaVM *vm,
const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)

View File

@ -192,3 +192,14 @@ SyntaxError
SyntaxError
\\\[ ["\\["]
\\\[ ["\\["]
ab.defgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
ab.defgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abCdefgxyzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abxyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abxCyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abxCyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
abCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz
utf83这要替换"!
3这要替换"!
abcdefg3higk"!

View File

@ -813,3 +813,68 @@ print(res8.flags);
print(e.name);
}
}
{
Object.defineProperty(RegExp.prototype, "global", {
value: false
})
let str1;
let result;
const re1 = /[Cz]/;
const re2 = /[Cz]/g;
const re3 = /([Cz])/;
const re4 = /([Cz])/g;
function createHaystack() {
let s = "abCdefgz";
for (let i = 0; i < 3; i++) s += s;
return s;
}
str1 = createHaystack();
function String1Replace(re) {
result = re[Symbol.replace](str1, ".");
}
function String2Replace(re) {
result = re[Symbol.replace](str1, "xyz");
}
function String3Replace(re) {
result = re[Symbol.replace](str1, "x$1yz");
}
function Function1Replace(re) {
result = re[Symbol.replace](str1, String);
}
String1Replace(re1);
print(result);
String1Replace(re2);
print(result);
String2Replace(re2);
print(result);
String2Replace(re1);
print(result);
String3Replace(re3);
print(result);
String3Replace(re4);
print(result);
Function1Replace(re3);
print(result);
Function1Replace(re4);
print(result);
// subStringIsUtf8 branch canbecompressed
str1 = 'utf83c这要替换"!';
let regexp = /([0-9])([a-z])/g
let newStr1 = str1.replace(regexp, "$1" );
print(newStr1);
// subStringIsUtf8 branch length=0;
str1 = '3c这要替换"!';
regexp = /([0-9])([a-z])/g
newStr1 = str1.replace(regexp, "$1" );
print(newStr1);
// subStringIsUtf8 branch src isUtf8;
str1 = 'abcdefg3chigk"!';
regexp = /([0-9])([a-z])/g
newStr1 = str1.replace(regexp, "$1" );
print(newStr1);
}