diff --git a/ecmascript/builtins/builtins_regexp.cpp b/ecmascript/builtins/builtins_regexp.cpp index c6a44bd389..032e4b8098 100644 --- a/ecmascript/builtins/builtins_regexp.cpp +++ b/ecmascript/builtins/builtins_regexp.cpp @@ -1153,8 +1153,9 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, bool isUtf8 = true; uint32_t resultStrLength = 0; uint32_t resultArrayLength = (static_cast(resultsIndex) + 1) * 2; - JSHandle resultArray = factory->NewTaggedArray(resultArrayLength); + CVector> resultArray(resultArrayLength, globalConst->GetHandledHole()); std::vector resultLengthArray(resultArrayLength); + CVector> capturesList; // 15. Let nextSourcePosition be 0. uint32_t nextSourcePosition = 0; JSMutableHandle getMatchString(thread, JSTaggedValue::Undefined()); @@ -1202,14 +1203,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, } // i. Let position be max(min(position, lengthS), 0). position = std::max(std::min(position, length), 0); - // j. Let n be 1. - uint32_t index = 1; - // k. Let captures be an empty List. - JSHandle capturesList = factory->NewTaggedArray(ncaptures); - // l. Repeat while n ≤ nCaptures - while (index <= ncaptures) { + // j. Let captures be an empty List. + capturesList.resize(ncaptures); + // l. Repeat while n < nCaptures + for (uint32_t index = 0; index < ncaptures; index++) { // i. Let capN be Get(result, ToString(n)). - capN.Update(ObjectFastOperator::FastGetPropertyByIndex(thread, resultValues.GetTaggedValue(), index)); + capN.Update(ObjectFastOperator::FastGetPropertyByIndex(thread, resultValues.GetTaggedValue(), index + 1)); // ii. ReturnIfAbrupt(capN). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); // iii. If capN is not undefined, then @@ -1218,16 +1217,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, JSHandle capNStr = JSTaggedValue::ToString(thread, capN); // 2. ReturnIfAbrupt(capN). RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - JSHandle capnStr = JSHandle::Cast(capNStr); - capturesList->Set(thread, index - 1, capnStr); + capturesList[index] = JSHandle(thread, capNStr.GetTaggedValue()); } else { // iv. Append capN as the last element of captures. - capturesList->Set(thread, index - 1, capN); + capturesList[index] = JSHandle(thread, capN.GetTaggedValue()); } - // v. Let n be n+1 - ++index; } - // j. Let namedCaptures be ? Get(result, "groups"). JSTaggedValue named = GetExecResultGroups(thread, resultValues, isFastPath); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); @@ -1240,29 +1235,26 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, } else { emptyArrLength = 4; // 4: «matched, pos, string, and groups» } - JSHandle replacerArgs = - factory->NewTaggedArray(emptyArrLength + capturesList->GetLength()); if (functionalReplace) { - // i. Let replacerArgs be «matched». - replacerArgs->Set(thread, 0, getMatchString.GetTaggedValue()); - // ii. Append in list order the elements of captures to the end of the List replacerArgs. - // iii. Append position and S as the last two elements of replacerArgs. - index = 0; - while (index < capturesList->GetLength()) { - replacerArgs->Set(thread, index + 1, capturesList->Get(index)); - ++index; - } - replacerArgs->Set(thread, index + 1, JSTaggedValue(position)); - replacerArgs->Set(thread, index + 2, inputStr.GetTaggedValue()); // 2: position of string - if (!namedCaptures->IsUndefined()) { - replacerArgs->Set(thread, index + 3, namedCaptures.GetTaggedValue()); // 3: position of groups - } - // iv. Let replValue be Call(replaceValue, undefined, replacerArgs). - const uint32_t argsLength = replacerArgs->GetLength(); + // Let replValue be Call(replaceValue, undefined, replacerArgs). EcmaRuntimeCallInfo *info = - EcmaInterpreter::NewRuntimeCallInfo(thread, inputReplaceValue, undefined, undefined, argsLength); + EcmaInterpreter::NewRuntimeCallInfo(thread, + inputReplaceValue, undefined, undefined, emptyArrLength + ncaptures); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); - info->SetCallArg(argsLength, replacerArgs); + + // i. Let replacerArgs be «matched». + info->SetCallArg(0, getMatchString.GetTaggedValue()); + // ii. Append in list order the elements of captures to the end of the List replacerArgs. + for (uint32_t index = 0; index < ncaptures; index++) { + info->SetCallArg(index + 1, capturesList[index].GetTaggedValue()); + } + // iii. Append position and S as the last two elements of replacerArgs. + info->SetCallArg(ncaptures + EXEC_RESULT_INDEX_OFFSET, JSTaggedValue(position)); + info->SetCallArg(ncaptures + EXEC_RESULT_INPUT_OFFSET, inputStr.GetTaggedValue()); + if (!namedCaptures->IsUndefined()) { + // iv. position of groups + info->SetCallArg(ncaptures + EXEC_RESULT_GROUPS_OFFSET, namedCaptures.GetTaggedValue()); + } JSTaggedValue replaceResult = JSFunction::Call(info); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); JSHandle replValue(thread, replaceResult); @@ -1272,32 +1264,37 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); } else { // n. Else, + JSHandle capturesArray = factory->NewTaggedArray(ncaptures); if (!namedCaptures->IsUndefined()) { JSHandle namedCapturesObj = JSTaggedValue::ToObject(thread, namedCaptures); RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); namedCaptures = JSHandle::Cast(namedCapturesObj); } + for (uint32_t index = 0; index < ncaptures; index++) { + capturesArray->Set(thread, index, capturesList[index]); + } replacementString.Update(BuiltinsString::GetSubstitution(thread, matchString, srcString, - position, capturesList, namedCaptures, replaceValueHandle)); + position, capturesArray, namedCaptures, replaceValueHandle)); } // p. If position ≥ nextSourcePosition, then if (position >= nextSourcePosition) { + ASSERT(REPLACE_RESULT_VAL * i + 1 < resultArray.size()); // ii. Let accumulatedResult be the String formed by concatenating the code units of the current value // of accumulatedResult with the substring of S consisting of the code units from nextSourcePosition // (inclusive) up to position (exclusive) and with the code units of replacement. // store undefined in resultArray - resultArray->Set(thread, REPLACE_RESULT_VAL * i, JSTaggedValue::Undefined()); + resultArray[REPLACE_RESULT_VAL * i] = globalConst->GetHandledUndefined(); uint64_t bits = 0; bits |= ReplaceLengthField::Encode(position - nextSourcePosition); bits |= ReplacePositionField::Encode(nextSourcePosition); // store position and length bits in resultLengthArray resultLengthArray[REPLACE_RESULT_VAL * i] = bits; resultStrLength += (position - nextSourcePosition); - auto subString = EcmaStringAccessor::FastSubString( + isUtf8 &= EcmaStringAccessor::SubStringIsUtf8( thread->GetEcmaVM(), srcString, nextSourcePosition, position - nextSourcePosition); - isUtf8 &= EcmaStringAccessor(subString).IsUtf8(); // store replacement string in resultArray - resultArray->Set(thread, REPLACE_RESULT_VAL * i + 1, replacementString.GetTaggedValue()); + resultArray[REPLACE_RESULT_VAL * i + 1] = + JSHandle(thread, replacementString.GetTaggedValue()); uint32_t replacementLength = EcmaStringAccessor(replacementString).GetLength(); // store length of replacement string in resultLengthArray resultLengthArray[REPLACE_RESULT_VAL * i + 1] = static_cast(replacementLength); @@ -1311,13 +1308,12 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread, // 17. If nextSourcePosition ≥ lengthS, return accumulatedResult. if (nextSourcePosition < length) { // store undefined in resultArray - resultArray->Set(thread, REPLACE_RESULT_VAL * resultsIndex, JSTaggedValue::Undefined()); + resultArray[REPLACE_RESULT_VAL * resultsIndex] = globalConst->GetHandledUndefined(); uint64_t bits = 0; bits |= ReplaceLengthField::Encode(length - nextSourcePosition); bits |= ReplacePositionField::Encode(nextSourcePosition); - auto subStringEnd = EcmaStringAccessor::FastSubString( + isUtf8 &= EcmaStringAccessor::SubStringIsUtf8( thread->GetEcmaVM(), srcString, nextSourcePosition, length - nextSourcePosition); - isUtf8 &= EcmaStringAccessor(subStringEnd).IsUtf8(); // store position and length bits in resultLengthArray resultLengthArray[REPLACE_RESULT_VAL * resultsIndex] = bits; resultStrLength += (length - nextSourcePosition); @@ -2891,7 +2887,8 @@ JSTaggedValue BuiltinsRegExp::GetExecResultGroups(JSThread *thread, const JSHand } JSHandle BuiltinsRegExp::CreateStringFromResultArray(JSThread *thread, - const JSHandle resultArray, const std::vector &resultLengthArray, + const CVector> &resultArray, + const std::vector &resultLengthArray, JSHandle srcString, uint32_t resultStrLength, bool isUtf8) { JSHandle result = JSHandle(thread, @@ -2902,9 +2899,9 @@ JSHandle BuiltinsRegExp::CreateStringFromResultArray(JSThread *threa } FlatStringInfo resultInfo = FlatStringInfo(*result, 0, resultStrLength); uint32_t nextPos = 0; - uint32_t resultArrayLength = resultArray->GetLength(); - for (int i = 0; i < static_cast(resultArrayLength); i++) { - JSTaggedValue substrValue = resultArray->Get(thread, i); + uint32_t resultArrayLength = resultArray.size(); + for (uint32_t i = 0; i < resultArrayLength; i++) { + JSTaggedValue substrValue = resultArray[i].GetTaggedValue(); if (substrValue.IsHole()) { continue; } diff --git a/ecmascript/builtins/builtins_regexp.h b/ecmascript/builtins/builtins_regexp.h index 56440f4b8b..24008c3b32 100644 --- a/ecmascript/builtins/builtins_regexp.h +++ b/ecmascript/builtins/builtins_regexp.h @@ -196,7 +196,8 @@ private: JSHandle inputString, int32_t lastIndex); static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle regexp, JSHandle string, uint32_t limit, bool useCache); - static JSHandle CreateStringFromResultArray(JSThread *thread, const JSHandle resultArray, + static JSHandle CreateStringFromResultArray(JSThread *thread, + const CVector> &resultArray, const std::vector &resultLengthArray, JSHandle srcString, uint32_t resultStrLength, bool isUtf8); }; diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp index 9109cab901..fdad437d51 100755 --- a/ecmascript/ecma_string.cpp +++ b/ecmascript/ecma_string.cpp @@ -185,6 +185,20 @@ EcmaString *EcmaString::GetSubString(const EcmaVM *vm, return FastSubString(vm, src, start, length); } +bool EcmaString::SubStringIsUtf8(const EcmaVM *vm, + const JSHandle &src, uint32_t start, uint32_t length) +{ + ASSERT((start + length) <= src->GetLength()); + if (length == 0) { + return true; + } + if (src->IsUtf8()) { + return true; + } + FlatStringInfo srcFlat = FlattenAllString(vm, src); + return CanBeCompressed(srcFlat.GetDataUtf16() + start, length); +} + void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length) { ASSERT(IsLineString() && !IsConstantString()); diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h index d486a52c7a..1fc72c52f0 100755 --- a/ecmascript/ecma_string.h +++ b/ecmascript/ecma_string.h @@ -143,6 +143,8 @@ private: uint32_t length, bool compressed); static EcmaString *FastSubString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length); + static bool SubStringIsUtf8(const EcmaVM *vm, + const JSHandle &src, uint32_t start, uint32_t length); static EcmaString *GetSlicedString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length); static EcmaString *GetSubString(const EcmaVM *vm, @@ -1139,7 +1141,11 @@ public: { return EcmaString::FastSubString(vm, src, start, length); } - + static bool SubStringIsUtf8(const EcmaVM *vm, + const JSHandle &src, uint32_t start, uint32_t length) + { + return EcmaString::SubStringIsUtf8(vm, src, start, length); + } // get static EcmaString *GetSubString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length) diff --git a/test/moduletest/regexp/expect_output.txt b/test/moduletest/regexp/expect_output.txt index 726ecf71d5..e3a4738bb6 100644 --- a/test/moduletest/regexp/expect_output.txt +++ b/test/moduletest/regexp/expect_output.txt @@ -191,4 +191,15 @@ SyntaxError SyntaxError SyntaxError \\\[ ["\\["] -\\\[ ["\\["] \ No newline at end of file +\\\[ ["\\["] +ab.defgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +ab.defgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abCdefgxyzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abxyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abxCyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abxCyzdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +abCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgzabCdefgz +utf83这要替换"! +3这要替换"! +abcdefg3higk"! diff --git a/test/moduletest/regexp/regexp.js b/test/moduletest/regexp/regexp.js index 201257e21b..b03cdd97d8 100644 --- a/test/moduletest/regexp/regexp.js +++ b/test/moduletest/regexp/regexp.js @@ -812,4 +812,69 @@ print(res8.flags); } catch (e) { print(e.name); } -} \ No newline at end of file +} + +{ + Object.defineProperty(RegExp.prototype, "global", { + value: false + }) + let str1; + let result; + const re1 = /[Cz]/; + const re2 = /[Cz]/g; + const re3 = /([Cz])/; + const re4 = /([Cz])/g; + + function createHaystack() { + let s = "abCdefgz"; + for (let i = 0; i < 3; i++) s += s; + return s; + } + str1 = createHaystack(); + function String1Replace(re) { + result = re[Symbol.replace](str1, "."); + } + function String2Replace(re) { + result = re[Symbol.replace](str1, "xyz"); + } + function String3Replace(re) { + result = re[Symbol.replace](str1, "x$1yz"); + } + function Function1Replace(re) { + result = re[Symbol.replace](str1, String); + } + String1Replace(re1); + print(result); + String1Replace(re2); + print(result); + String2Replace(re2); + print(result); + String2Replace(re1); + print(result); + String3Replace(re3); + print(result); + String3Replace(re4); + print(result); + Function1Replace(re3); + print(result); + Function1Replace(re4); + print(result); + + // subStringIsUtf8 branch canbecompressed + str1 = 'utf83c这要替换"!'; + let regexp = /([0-9])([a-z])/g + let newStr1 = str1.replace(regexp, "$1" ); + print(newStr1); + + // subStringIsUtf8 branch length=0; + str1 = '3c这要替换"!'; + regexp = /([0-9])([a-z])/g + newStr1 = str1.replace(regexp, "$1" ); + print(newStr1); + + // subStringIsUtf8 branch src isUtf8; + str1 = 'abcdefg3chigk"!'; + regexp = /([0-9])([a-z])/g + newStr1 = str1.replace(regexp, "$1" ); + print(newStr1); +}