mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-10-06 23:54:03 +00:00
optimize regexp replace internal string concat
Issue: https://gitee.com/open_harmony/dashboard?issue_id=I9GE8E Signed-off-by: chenjx-huawei <chenjingxiang1@huawei.com> Change-Id: I94e2be5fc98d98e7346f54845359ed4d667e6c6e
This commit is contained in:
parent
e9edd355d5
commit
d5086b5c96
@ -1082,7 +1082,11 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
|
||||
}
|
||||
}
|
||||
// 14. Let accumulatedResult be the empty String value.
|
||||
JSMutableHandle<EcmaString> accumulatedResult(thread, factory->GetEmptyString());
|
||||
bool isUtf8 = EcmaStringAccessor(srcString).IsUtf8();
|
||||
uint32_t resultStrLength = 0;
|
||||
uint32_t resultArrayLength = (resultsIndex + 1) * 2;
|
||||
JSHandle<TaggedArray> resultArray = factory->NewTaggedArray(resultArrayLength);
|
||||
std::vector<uint64_t> resultLengthArray(resultArrayLength);
|
||||
// 15. Let nextSourcePosition be 0.
|
||||
uint32_t nextSourcePosition = 0;
|
||||
JSMutableHandle<JSTaggedValue> getMatchString(thread, JSTaggedValue::Undefined());
|
||||
@ -1214,39 +1218,49 @@ JSTaggedValue BuiltinsRegExp::ReplaceInternal(JSThread *thread,
|
||||
// ii. Let accumulatedResult be the String formed by concatenating the code units of the current value
|
||||
// of accumulatedResult with the substring of S consisting of the code units from nextSourcePosition
|
||||
// (inclusive) up to position (exclusive) and with the code units of replacement.
|
||||
auto substr = EcmaStringAccessor::FastSubString(thread->GetEcmaVM(),
|
||||
JSHandle<EcmaString>::Cast(inputStr), nextSourcePosition, position - nextSourcePosition);
|
||||
accumulatedResult.Update(JSHandle<EcmaString>(thread, EcmaStringAccessor::Concat(thread->GetEcmaVM(),
|
||||
accumulatedResult, JSHandle<EcmaString>(thread, substr))));
|
||||
accumulatedResult.Update(JSHandle<EcmaString>(thread, EcmaStringAccessor::Concat(thread->GetEcmaVM(),
|
||||
accumulatedResult, replacementString)));
|
||||
// store undefined in resultArray
|
||||
resultArray->Set(thread, REPLACE_RESULT_VAL * i, JSTaggedValue::Undefined());
|
||||
uint64_t bits = 0;
|
||||
bits |= ReplaceLengthField::Encode(position - nextSourcePosition);
|
||||
bits |= ReplacePositionField::Encode(nextSourcePosition);
|
||||
// store position and length bits in resultLengthArray
|
||||
resultLengthArray[REPLACE_RESULT_VAL * i] = bits;
|
||||
resultStrLength += (position - nextSourcePosition);
|
||||
// store replacement string in resultArray
|
||||
resultArray->Set(thread, REPLACE_RESULT_VAL * i + 1, replacementString.GetTaggedValue());
|
||||
uint32_t replacementLength = EcmaStringAccessor(replacementString).GetLength();
|
||||
// store length of replacement string in resultLengthArray
|
||||
resultLengthArray[REPLACE_RESULT_VAL * i + 1] = static_cast<uint64_t>(replacementLength);
|
||||
resultStrLength += replacementLength;
|
||||
isUtf8 &= EcmaStringAccessor(replacementString).IsUtf8();
|
||||
// iii. Let nextSourcePosition be position + matchLength.
|
||||
nextSourcePosition = position + matchLength;
|
||||
}
|
||||
}
|
||||
|
||||
// 17. If nextSourcePosition ≥ lengthS, return accumulatedResult.
|
||||
if (nextSourcePosition >= length) {
|
||||
if (useCache) {
|
||||
RegExpExecResultCache::AddResultInCache(thread, cacheTable, thisObj, string,
|
||||
JSHandle<JSTaggedValue>(accumulatedResult),
|
||||
RegExpExecResultCache::REPLACE_TYPE, 0, nextIndexHandle->GetInt(),
|
||||
inputReplaceValue.GetTaggedValue());
|
||||
}
|
||||
return accumulatedResult.GetTaggedValue();
|
||||
if (nextSourcePosition < length) {
|
||||
// store undefined in resultArray
|
||||
resultArray->Set(thread, REPLACE_RESULT_VAL * resultsIndex, JSTaggedValue::Undefined());
|
||||
uint64_t bits = 0;
|
||||
bits |= ReplaceLengthField::Encode(length - nextSourcePosition);
|
||||
bits |= ReplacePositionField::Encode(nextSourcePosition);
|
||||
// store position and length bits in resultLengthArray
|
||||
resultLengthArray[REPLACE_RESULT_VAL * resultsIndex] = bits;
|
||||
resultStrLength += (length - nextSourcePosition);
|
||||
}
|
||||
|
||||
JSHandle<EcmaString> result =
|
||||
CreateStringFromResultArray(thread, resultArray, resultLengthArray, srcString, resultStrLength, isUtf8);
|
||||
// 18. Return the String formed by concatenating the code units of accumulatedResult with the substring of S
|
||||
// consisting of the code units from nextSourcePosition (inclusive) up through the final code unit of S(inclusive).
|
||||
auto substr = EcmaStringAccessor::FastSubString(thread->GetEcmaVM(),
|
||||
JSHandle<EcmaString>::Cast(inputStr), nextSourcePosition, length - nextSourcePosition);
|
||||
accumulatedResult.Update(JSHandle<EcmaString>(thread, EcmaStringAccessor::Concat(thread->GetEcmaVM(),
|
||||
accumulatedResult, JSHandle<EcmaString>(thread, substr))));
|
||||
if (useCache) {
|
||||
RegExpExecResultCache::AddResultInCache(thread, cacheTable, thisObj, string,
|
||||
JSHandle<JSTaggedValue>(accumulatedResult),
|
||||
JSHandle<JSTaggedValue>(result),
|
||||
RegExpExecResultCache::REPLACE_TYPE, 0, nextIndexHandle->GetInt(),
|
||||
inputReplaceValue.GetTaggedValue());
|
||||
}
|
||||
return accumulatedResult.GetTaggedValue();
|
||||
return result.GetTaggedValue();
|
||||
}
|
||||
|
||||
// 21.2.5.9
|
||||
@ -2774,4 +2788,49 @@ JSTaggedValue BuiltinsRegExp::GetExecResultGroups(JSThread *thread, const JSHand
|
||||
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
|
||||
return groups;
|
||||
}
|
||||
|
||||
JSHandle<EcmaString> BuiltinsRegExp::CreateStringFromResultArray(JSThread *thread,
|
||||
const JSHandle<TaggedArray> resultArray, const std::vector<uint64_t> &resultLengthArray,
|
||||
JSHandle<EcmaString> srcString, uint32_t resultStrLength, bool isUtf8)
|
||||
{
|
||||
JSHandle<EcmaString> result = JSHandle<EcmaString>(thread,
|
||||
EcmaStringAccessor::CreateLineString(thread->GetEcmaVM(), resultStrLength, isUtf8));
|
||||
FlatStringInfo resultInfo = FlatStringInfo(*result, 0, resultStrLength);
|
||||
FlatStringInfo flatStrInfo = EcmaStringAccessor::FlattenAllString(thread->GetEcmaVM(), srcString);
|
||||
if (EcmaStringAccessor(srcString).IsTreeString()) { // use flattenedString as srcString
|
||||
srcString = JSHandle<EcmaString>(thread, flatStrInfo.GetString());
|
||||
}
|
||||
uint32_t nextPos = 0;
|
||||
uint32_t resultArrayLength = resultArray->GetLength();
|
||||
for (int i = 0; i < static_cast<int>(resultArrayLength); i++) {
|
||||
JSTaggedValue substrValue = resultArray->Get(thread, i);
|
||||
if (substrValue.IsHole()) {
|
||||
continue;
|
||||
}
|
||||
resultInfo.SetStartIndex(nextPos);
|
||||
if (substrValue.IsUndefined()) {
|
||||
uint64_t bits = resultLengthArray[i];
|
||||
uint32_t subLength = ReplaceLengthField::Decode(bits);
|
||||
uint32_t subPosition = ReplacePositionField::Decode(bits);
|
||||
if (isUtf8) {
|
||||
EcmaStringAccessor::WriteToFlatWithPos<uint8_t>(*srcString, resultInfo.GetDataUtf8Writable(),
|
||||
subLength, subPosition);
|
||||
} else {
|
||||
EcmaStringAccessor::WriteToFlatWithPos<uint16_t>(*srcString, resultInfo.GetDataUtf16Writable(),
|
||||
subLength, subPosition);
|
||||
}
|
||||
nextPos += subLength;
|
||||
} else {
|
||||
EcmaString *replacementStr = EcmaString::Cast(substrValue.GetTaggedObject());
|
||||
uint32_t replaceLength = static_cast<uint32_t>(resultLengthArray[i]);
|
||||
if (isUtf8) {
|
||||
EcmaStringAccessor::WriteToFlat(replacementStr, resultInfo.GetDataUtf8Writable(), replaceLength);
|
||||
} else {
|
||||
EcmaStringAccessor::WriteToFlat(replacementStr, resultInfo.GetDataUtf16Writable(), replaceLength);
|
||||
}
|
||||
nextPos += replaceLength;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace panda::ecmascript::builtins
|
||||
|
@ -129,6 +129,12 @@ private:
|
||||
static constexpr uint32_t EXEC_RESULT_INPUT_OFFSET = 2;
|
||||
static constexpr uint32_t EXEC_RESULT_GROUPS_OFFSET = 3;
|
||||
|
||||
static constexpr uint32_t REPLACE_RESULT_VAL = 2;
|
||||
static constexpr unsigned REPLACE_LENGTH_BITS = 30;
|
||||
static constexpr unsigned REPLACE_POSITION_BITS = 30;
|
||||
using ReplaceLengthField = BitField<uint32_t, 0, REPLACE_LENGTH_BITS>; // 30
|
||||
using ReplacePositionField = ReplaceLengthField::NextField<uint32_t, REPLACE_POSITION_BITS>; // 60
|
||||
|
||||
static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
|
||||
const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16);
|
||||
|
||||
@ -160,6 +166,9 @@ private:
|
||||
static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
|
||||
JSHandle<JSTaggedValue> string, uint32_t limit, bool useCache);
|
||||
static bool GetOringinalFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag);
|
||||
static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread, const JSHandle<TaggedArray> resultArray,
|
||||
const std::vector<uint64_t> &resultLengthArray, JSHandle<EcmaString> srcString,
|
||||
uint32_t resultStrLength, bool isUtf8);
|
||||
};
|
||||
|
||||
class RegExpExecResultCache : public TaggedArray {
|
||||
|
@ -1665,7 +1665,7 @@ JSTaggedValue BuiltinsString::CreateArrayFromString(JSThread *thread, EcmaVM *ec
|
||||
{
|
||||
bool isUtf8 = EcmaStringAccessor(thisString).IsUtf8();
|
||||
bool canBeCompressed = false;
|
||||
if (EcmaStringAccessor(thisString).IsLineString() || EcmaStringAccessor(thisString).IsConstantString()) {
|
||||
if (EcmaStringAccessor(thisString).IsLineOrConstantString()) {
|
||||
canBeCompressed = EcmaStringAccessor::CanBeCompressed(*thisString);
|
||||
}
|
||||
bool isOneByte = isUtf8 & canBeCompressed;
|
||||
|
@ -423,6 +423,55 @@ void EcmaString::WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
void EcmaString::WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
|
||||
{
|
||||
DISALLOW_GARBAGE_COLLECTION;
|
||||
[[ maybe_unused ]] uint32_t maxLength = src->GetLength();
|
||||
if (length == 0) {
|
||||
return;
|
||||
}
|
||||
while (true) {
|
||||
ASSERT(length + pos <= maxLength && length > 0);
|
||||
ASSERT(length <= src->GetLength());
|
||||
ASSERT(pos >= 0);
|
||||
switch (src->GetStringType()) {
|
||||
case JSType::LINE_STRING: {
|
||||
if (src->IsUtf8()) {
|
||||
CopyChars(buf, src->GetDataUtf8() + pos, length);
|
||||
} else {
|
||||
CopyChars(buf, src->GetDataUtf16() + pos, length);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case JSType::CONSTANT_STRING: {
|
||||
ASSERT(src->IsUtf8());
|
||||
CopyChars(buf, src->GetDataUtf8() + pos, length);
|
||||
return;
|
||||
}
|
||||
case JSType::TREE_STRING: {
|
||||
TreeEcmaString *treeSrc = TreeEcmaString::Cast(src);
|
||||
EcmaString *first = EcmaString::Cast(treeSrc->GetFirst());
|
||||
ASSERT(first->IsLineString());
|
||||
src = first;
|
||||
continue;
|
||||
}
|
||||
case JSType::SLICED_STRING: {
|
||||
EcmaString *parent = EcmaString::Cast(SlicedString::Cast(src)->GetParent());
|
||||
if (src->IsUtf8()) {
|
||||
CopyChars(buf, parent->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex() + pos, length);
|
||||
} else {
|
||||
CopyChars(buf, parent->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex() + pos, length);
|
||||
}
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOG_ECMA(FATAL) << "this branch is unreachable";
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline const uint8_t *FlatStringInfo::GetDataUtf8() const
|
||||
{
|
||||
return string_->GetDataUtf8() + startIndex_;
|
||||
@ -438,6 +487,11 @@ inline uint8_t *FlatStringInfo::GetDataUtf8Writable() const
|
||||
return string_->GetDataUtf8Writable() + startIndex_;
|
||||
}
|
||||
|
||||
inline uint16_t *FlatStringInfo::GetDataUtf16Writable() const
|
||||
{
|
||||
return string_->GetDataUtf16Writable() + startIndex_;
|
||||
}
|
||||
|
||||
inline const uint8_t *EcmaStringAccessor::GetDataUtf8()
|
||||
{
|
||||
return string_->GetDataUtf8();
|
||||
|
@ -711,6 +711,9 @@ private:
|
||||
template <typename Char>
|
||||
static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
|
||||
|
||||
template <typename Char>
|
||||
static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos);
|
||||
|
||||
static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
|
||||
|
||||
static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
|
||||
@ -1024,6 +1027,11 @@ public:
|
||||
return startIndex_;
|
||||
}
|
||||
|
||||
void SetStartIndex(uint32_t index)
|
||||
{
|
||||
startIndex_ = index;
|
||||
}
|
||||
|
||||
uint32_t GetLength() const
|
||||
{
|
||||
return length_;
|
||||
@ -1032,6 +1040,7 @@ public:
|
||||
const uint8_t *GetDataUtf8() const;
|
||||
const uint16_t *GetDataUtf16() const;
|
||||
uint8_t *GetDataUtf8Writable() const;
|
||||
uint16_t *GetDataUtf16Writable() const;
|
||||
std::u16string ToU16String(uint32_t len = 0);
|
||||
private:
|
||||
EcmaString *string_ {nullptr};
|
||||
@ -1248,6 +1257,18 @@ public:
|
||||
return string_->CopyDataUtf16(buf, maxLength);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
|
||||
{
|
||||
src->WriteToFlatWithPos(src, buf, length, pos);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
|
||||
{
|
||||
src->WriteToFlat(src, buf, maxLength);
|
||||
}
|
||||
|
||||
// require dst is LineString
|
||||
// not change src data structure.
|
||||
// if src is not flat, this func has low efficiency.
|
||||
|
@ -12,3 +12,17 @@
|
||||
# limitations under the License.
|
||||
|
||||
[1.1]
|
||||
这是一段lineString,X1这是替换的字符串A2
|
||||
这是一段lineString,X1这是替换的字符串X2
|
||||
这是一段treeString,X1这是替换的字符串B2
|
||||
这是一段treeString,X1这是替换的字符串X2
|
||||
这是一段slicedString,X1这是要替换的字符串C2
|
||||
这是一段slicedString,X1这是要替换的字符串X2
|
||||
aaaaxxxxxxxyyyyyyyyybbbbxxxxxxxyyyyyyyyyccccxxxxxxxyyyyyyyyyaaaabbbbcccc
|
||||
aaaaxxxxxxxxxxxxxxbbbbxxxxxxxxxxxxxxccccxxxxxxxxxxxxxxaaaabbbbcccc
|
||||
aaaaxxxxxxxxxxxxxxyyyyyyyybbbbxxxxxxxxxxxxxxyyyyyyyyccccxxxxxxxxxxxxxxyyyyyyyyaaaabbbbcccc
|
||||
aaaaxxxxxxxxxxxxxxybbbbxxxxxxxxxxxxxxyccccxxxxxxxxxxxxxxyaaaabbbbcccc
|
||||
aaaa哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻嘻嘻bbbb哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻嘻嘻cccc哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻嘻嘻aaaabbbbcccc
|
||||
aaaa哈哈哈哈哈哈哈哈哈哈哈哈哈哈bbbb哈哈哈哈哈哈哈哈哈哈哈哈哈哈cccc哈哈哈哈哈哈哈哈哈哈哈哈哈哈aaaabbbbcccc
|
||||
aaaa哈哈哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻bbbb哈哈哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻cccc哈哈哈哈哈哈哈哈哈嘻嘻嘻嘻嘻嘻嘻aaaabbbbcccc
|
||||
aaaa哈哈哈哈哈哈哈哈哈嘻bbbb哈哈哈哈哈哈哈哈哈嘻cccc哈哈哈哈哈哈哈哈哈嘻aaaabbbbcccc
|
||||
|
@ -29,4 +29,75 @@ r.exec = function() {
|
||||
return coercibleValue;
|
||||
};
|
||||
let a = r[Symbol.replace]('', '[$<length>]');
|
||||
print(a)
|
||||
print(a)
|
||||
|
||||
let lineString1 = "这是一段lineString,A1这是替换的字符串A2"
|
||||
let treeString1 = "这是一段treeString,".concat("B1这是替换的字符串B2")
|
||||
let slicedString = "这是一段slicedString,C1这是要替换的字符串C2,xxxxxxxx".slice(0, 30);
|
||||
|
||||
var re1 = /[ABC]/;
|
||||
var re2 = /[ABC]/g;
|
||||
|
||||
var res1 = lineString1.replace(re1, "X");
|
||||
var res2 = lineString1.replace(re2, "X");
|
||||
var res3 = treeString1.replace(re1, "X");
|
||||
var res4 = treeString1.replace(re2, "X");
|
||||
var res5 = slicedString.replace(re1, "X");
|
||||
var res6 = slicedString.replace(re2, "X");
|
||||
|
||||
print(res1)
|
||||
print(res2)
|
||||
print(res3)
|
||||
print(res4)
|
||||
print(res5)
|
||||
print(res6)
|
||||
|
||||
let lineString2 = "aaaaAbbbbBccccCaaaabbbbcccc"
|
||||
|
||||
function func1() {
|
||||
return "xxxxxxx".concat("yyyyyyyyy")
|
||||
}
|
||||
var res = lineString2.replace(re2, func1);
|
||||
print(res)
|
||||
|
||||
function func2() {
|
||||
return "xxxxxxx".concat("xxxxxxx")
|
||||
}
|
||||
res = lineString2.replace(re2, func2);
|
||||
print(res)
|
||||
|
||||
function func3() {
|
||||
return "xxxxxxxxxxxxxx".concat("yyyyyyyy")
|
||||
}
|
||||
res = lineString2.replace(re2, func3);
|
||||
print(res)
|
||||
|
||||
function func4() {
|
||||
return "xxxxxxxxxxxxxx".concat("y")
|
||||
}
|
||||
res = lineString2.replace(re2, func4);
|
||||
print(res)
|
||||
|
||||
function func5() {
|
||||
return "哈哈哈哈哈哈哈".concat("嘻嘻嘻嘻嘻嘻嘻嘻嘻")
|
||||
}
|
||||
res = lineString2.replace(re2, func5);
|
||||
print(res)
|
||||
|
||||
function func6() {
|
||||
return "哈哈哈哈哈哈哈".concat("哈哈哈哈哈哈哈")
|
||||
}
|
||||
res = lineString2.replace(re2, func6);
|
||||
print(res)
|
||||
|
||||
function func7() {
|
||||
return "哈哈哈哈哈哈哈哈哈".concat("嘻嘻嘻嘻嘻嘻嘻")
|
||||
}
|
||||
res = lineString2.replace(re2, func7);
|
||||
print(res)
|
||||
|
||||
function func8() {
|
||||
return "哈哈哈哈哈哈哈哈哈".concat("嘻")
|
||||
}
|
||||
res = lineString2.replace(re2, func8);
|
||||
print(res)
|
@ -76,4 +76,8 @@ print(res15[0] == res16[0]);
|
||||
var a = "12345678910"
|
||||
var b = "12345678910"
|
||||
var c = a.concat(b);
|
||||
c.split("")
|
||||
c.split("")
|
||||
|
||||
// Test split string is sliced string
|
||||
var d = a.slice(4)
|
||||
d.split("")
|
Loading…
Reference in New Issue
Block a user