optimize string regexp related interface

Issue: https://gitee.com/openharmony/arkcompiler_ets_runtime/issues/I9BBXA?from=project-issue

Signed-off-by: chenjx-huawei <chenjingxiang1@huawei.com>
Change-Id: If9dfed652e9338338cae87962815ff1b7744085b
This commit is contained in:
chenjx-huawei 2024-03-25 14:52:04 +08:00
parent d0896b61f3
commit 59c437599e
11 changed files with 181 additions and 17 deletions

View File

@ -1310,10 +1310,9 @@ JSTaggedValue BuiltinsRegExp::RegExpSearchFast(JSThread *thread,
const JSHandle<JSTaggedValue> string)
{
JSHandle<RegExpExecResultCache> cacheTable(thread->GetCurrentEcmaContext()->GetRegExpCache());
uint32_t lastIndexInput = static_cast<uint32_t>(GetLastIndex(thread, regexp, true));
JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, string,
RegExpExecResultCache::SEARCH_TYPE, regexp,
JSTaggedValue(lastIndexInput));
JSTaggedValue(0));
if (!cacheResult.IsUndefined()) {
return cacheResult;
}
@ -1570,10 +1569,9 @@ JSTaggedValue BuiltinsRegExp::RegExpSplitFast(JSThread *thread, const JSHandle<J
}
JSHandle<RegExpExecResultCache> cacheTable(thread->GetCurrentEcmaContext()->GetRegExpCache());
if (useCache) {
uint32_t lastIndexInput = static_cast<uint32_t>(GetLastIndex(thread, regexp, true));
JSTaggedValue cacheResult = cacheTable->FindCachedResult(thread, jsString,
RegExpExecResultCache::SPLIT_TYPE, regexp,
JSTaggedValue(lastIndexInput));
JSTaggedValue(0));
if (!cacheResult.IsUndefined()) {
return cacheResult;
}
@ -2759,7 +2757,7 @@ JSTaggedValue BuiltinsRegExp::GetExecResultIndex(JSThread *thread, const JSHandl
}
JSHandle<JSTaggedValue> resultIndex = thread->GlobalConstants()->GetHandledIndexString();
JSTaggedValue index = ObjectFastOperator::FastGetPropertyByValue(
thread, execResults.GetTaggedValue(), resultIndex.GetTaggedValue());
thread, execResults.GetTaggedValue(), resultIndex.GetTaggedValue());
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
return index;
}
@ -2772,7 +2770,7 @@ JSTaggedValue BuiltinsRegExp::GetExecResultGroups(JSThread *thread, const JSHand
}
JSHandle<JSTaggedValue> groupKey = thread->GlobalConstants()->GetHandledGroupsString();
JSTaggedValue groups = ObjectFastOperator::FastGetPropertyByValue(
thread, execResults.GetTaggedValue(), groupKey.GetTaggedValue());
thread, execResults.GetTaggedValue(), groupKey.GetTaggedValue());
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
return groups;
}

View File

@ -1665,16 +1665,38 @@ JSTaggedValue BuiltinsString::Split(EcmaRuntimeCallInfo *argv)
JSTaggedValue BuiltinsString::CreateArrayFromString(JSThread *thread, EcmaVM *ecmaVm,
const JSHandle<EcmaString> &thisString, uint32_t thisLength, uint32_t lim)
{
bool isUtf8 = EcmaStringAccessor(thisString).IsUtf8();
bool canBeCompressed = EcmaStringAccessor::CanBeCompressed(*thisString);
bool isOneByte = isUtf8 & canBeCompressed;
JSHandle<EcmaString> seperatorString = thread->GetEcmaVM()->GetFactory()->GetEmptyString();
if (lim == UINT32_MAX - 1) {
JSHandle<StringSplitResultCache> cacheTable(thread->GetCurrentEcmaContext()->GetStringSplitResultCache());
JSTaggedValue cacheResult = StringSplitResultCache::FindCachedResult(thread, cacheTable, thisString,
seperatorString, isOneByte);
if (cacheResult != JSTaggedValue::Undefined()) {
JSHandle<JSTaggedValue> resultArray(JSArray::CreateArrayFromList(thread,
JSHandle<TaggedArray>(thread, cacheResult)));
return resultArray.GetTaggedValue();
}
}
uint32_t actualLength = std::min(thisLength, lim);
JSHandle<JSObject> resultArray(JSArray::ArrayCreate(thread, JSTaggedNumber(actualLength)));
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
JSHandle<TaggedArray> array = factory->NewTaggedArray(actualLength);
for (uint32_t i = 0; i < actualLength; ++i) {
EcmaString *elementString = EcmaStringAccessor::FastSubString(ecmaVm, thisString, i, 1);
JSHandle<JSTaggedValue> elementTag(thread, elementString);
EcmaString *elementString = EcmaStringAccessor::GetSubString(ecmaVm, thisString, i, 1);
// Perform CreateDataProperty(A, "0", S), CreateDataProperty's fast path
JSObject::CreateDataProperty(thread, resultArray, i, elementTag);
if (isOneByte) {
array->Set<false>(thread, i, JSTaggedValue(elementString));
} else {
array->Set(thread, i, JSTaggedValue(elementString));
}
ASSERT_PRINT(!thread->HasPendingException(), "CreateDataProperty can't throw exception");
}
JSHandle<JSArray> resultArray = JSArray::CreateArrayFromList(thread, array);
if (lim == UINT32_MAX - 1) {
JSHandle<StringSplitResultCache> cacheTable(thread->GetCurrentEcmaContext()->GetStringSplitResultCache());
StringSplitResultCache::SetCachedResult(thread, cacheTable, thisString, seperatorString, array);
}
return resultArray.GetTaggedValue();
}
@ -2323,7 +2345,7 @@ JSTaggedValue StringSplitResultCache::CreateCacheTable(const JSThread *thread)
JSTaggedValue StringSplitResultCache::FindCachedResult(const JSThread *thread,
const JSHandle<StringSplitResultCache> &cache, const JSHandle<EcmaString> &thisString,
const JSHandle<EcmaString> &pattern)
const JSHandle<EcmaString> &pattern, bool isOneByte)
{
uint32_t hash = EcmaStringAccessor(thisString).GetHashcode();
uint32_t entry = hash & (CACHE_SIZE - 1);
@ -2341,8 +2363,12 @@ JSTaggedValue StringSplitResultCache::FindCachedResult(const JSThread *thread,
JSHandle<TaggedArray> cacheArray(thread, cache->Get(index + ARRAY_INDEX));
uint32_t arrayLength = cacheArray->GetLength();
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
JSHandle<TaggedArray> copyArray = factory->NewAndCopyTaggedArray(cacheArray,
arrayLength, arrayLength);
JSHandle<TaggedArray> copyArray;
if (isOneByte) {
copyArray = factory->NewAndCopyTaggedArraySkipBarrier(cacheArray, arrayLength, arrayLength);
} else {
copyArray = factory->NewAndCopyTaggedArray(cacheArray, arrayLength, arrayLength);
}
return copyArray.GetTaggedValue();
}
return JSTaggedValue::Undefined();

View File

@ -270,7 +270,7 @@ private:
static JSTaggedValue Pad(EcmaRuntimeCallInfo *argv, bool isStart);
static int32_t ConvertDoubleToInt(double d);
static JSTaggedValue CreateArrayFromString(JSThread *thread, EcmaVM *ecmaVm,
const JSHandle<EcmaString> &thisString, uint32_t thisLength, uint32_t lim);
const JSHandle<EcmaString> &thisString, uint32_t thisLength, uint32_t lim = UINT32_MAX - 1);
static JSTaggedValue CreateArrayBySplitString(JSThread *thread, EcmaVM *ecmaVm,
const JSHandle<EcmaString> &thisString, const JSHandle<EcmaString> &seperatorString,
uint32_t thisLength, uint32_t seperatorLength, uint32_t lim);
@ -298,7 +298,7 @@ public:
}
static JSTaggedValue CreateCacheTable(const JSThread *thread);
static JSTaggedValue FindCachedResult(const JSThread *thread, const JSHandle<StringSplitResultCache> &cache,
const JSHandle<EcmaString> &string, const JSHandle<EcmaString> &pattern);
const JSHandle<EcmaString> &string, const JSHandle<EcmaString> &pattern, bool isOneByte = false);
static void SetCachedResult(const JSThread *thread, const JSHandle<StringSplitResultCache> &cache,
const JSHandle<EcmaString> &string, const JSHandle<EcmaString> &pattern,
const JSHandle<TaggedArray> &result);

View File

@ -169,6 +169,9 @@ inline EcmaString *EcmaString::CreateLineStringWithSpaceType(const EcmaVM *vm, s
case MemSpaceType::SHARED_NON_MOVABLE:
string = vm->GetFactory()->AllocNonMovableLineStringObject(size);
break;
case MemSpaceType::SHARED_READ_ONLY_SPACE:
string = vm->GetFactory()->AllocReadOnlyLineStringObject(size);
break;
default:
LOG_ECMA(FATAL) << "this branch is unreachable";
UNREACHABLE();

View File

@ -175,6 +175,24 @@ EcmaString *EcmaStringTable::CreateAndInternStringNonMovable(EcmaVM *vm, const u
return str;
}
/*
This function is used to create global constant strings from read-only sapce only.
It only inserts string into string-table and provides no string-table validity check.
*/
EcmaString *EcmaStringTable::CreateAndInternStringReadOnly(EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len)
{
RuntimeLockHolder locker(vm->GetJSThread(), mutex_);
std::pair<EcmaString *, uint32_t> result = GetStringThreadUnsafe(utf8Data, utf8Len, true);
if (result.first != nullptr) {
return result.first;
}
EcmaString *str = EcmaStringAccessor::CreateFromUtf8(vm, utf8Data, utf8Len, true,
MemSpaceType::SHARED_READ_ONLY_SPACE);
str->SetMixHashcode(result.second);
InternStringThreadUnsafe(str);
return str;
}
EcmaString *EcmaStringTable::GetOrInternString(EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
bool canBeCompress)
{
@ -378,7 +396,7 @@ JSTaggedValue SingleCharTable::CreateSingleCharTable(JSThread *thread)
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
for (uint32_t i = 1; i < MAX_ONEBYTE_CHARCODE; ++i) {
std::string tmp(1, i + 0X00); // 1: size
table->Set(thread, i, factory->NewFromASCIINonMovable(tmp).GetTaggedValue());
table->Set(thread, i, factory->NewFromASCIIReadOnly(tmp).GetTaggedValue());
}
return table.GetTaggedValue();
}

View File

@ -43,6 +43,7 @@ public:
const JSHandle<EcmaString> &secondString);
EcmaString *GetOrInternString(EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress);
EcmaString *CreateAndInternStringNonMovable(EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len);
EcmaString *CreateAndInternStringReadOnly(EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len);
EcmaString *GetOrInternString(EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, bool canBeCompress);
EcmaString *GetOrInternString(EcmaVM *vm, EcmaString *string);
EcmaString *GetOrInternCompressedSubString(EcmaVM *vm, const JSHandle<EcmaString> &string,

View File

@ -61,6 +61,13 @@ EcmaString *ObjectFactory::AllocOldSpaceLineStringObject(size_t size)
thread_, JSHClass::Cast(thread_->GlobalConstants()->GetLineStringClass().GetTaggedObject()), size));
}
EcmaString *ObjectFactory::AllocReadOnlyLineStringObject(size_t size)
{
NewSObjectHook();
return reinterpret_cast<EcmaString *>(sHeap_->AllocateReadOnlyOrHugeObject(
thread_, JSHClass::Cast(thread_->GlobalConstants()->GetLineStringClass().GetTaggedObject()), size));
}
EcmaString *ObjectFactory::AllocSlicedStringObject(MemSpaceType type)
{
ASSERT(IsSMemSpace(type));

View File

@ -2462,6 +2462,24 @@ JSHandle<TaggedArray> ObjectFactory::NewAndCopyTaggedArray(JSHandle<TaggedArray>
return dstElements;
}
JSHandle<TaggedArray> ObjectFactory::NewAndCopyTaggedArraySkipBarrier(JSHandle<TaggedArray> &srcElements,
uint32_t newLength, uint32_t oldLength, uint32_t k)
{
ASSERT(oldLength <= newLength);
MemSpaceType spaceType = newLength < LENGTH_THRESHOLD ? MemSpaceType::SEMI_SPACE : MemSpaceType::OLD_SPACE;
JSHandle<TaggedArray> dstElements = NewTaggedArrayWithoutInit(newLength, spaceType);
if (newLength == 0) {
return dstElements;
}
for (uint32_t i = 0; i < oldLength; i++) {
dstElements->Set<false>(thread_, i, srcElements->Get(i + k));
}
for (uint32_t i = oldLength; i < newLength; i++) {
dstElements->Set(thread_, i, JSTaggedValue::Hole());
}
return dstElements;
}
JSHandle<TaggedArray> ObjectFactory::NewAndCopySNameDictionary(JSHandle<TaggedArray> &srcElements, uint32_t length)
{
JSHandle<TaggedArray> dstElements = NewSDictionaryArray(length);
@ -3018,6 +3036,16 @@ JSHandle<EcmaString> ObjectFactory::GetStringFromStringTableNonMovable(const uin
return JSHandle<EcmaString>(thread_, stringTable->CreateAndInternStringNonMovable(vm_, utf8Data, utf8Len));
}
JSHandle<EcmaString> ObjectFactory::GetStringFromStringTableReadOnly(const uint8_t *utf8Data, uint32_t utf8Len) const
{
NewObjectHook();
if (utf8Len == 0) {
return GetEmptyString();
}
auto stringTable = vm_->GetEcmaStringTable();
return JSHandle<EcmaString>(thread_, stringTable->CreateAndInternStringReadOnly(vm_, utf8Data, utf8Len));
}
JSHandle<EcmaString> ObjectFactory::GetStringFromStringTable(const uint16_t *utf16Data, uint32_t utf16Len,
bool canBeCompress) const
{
@ -3996,6 +4024,13 @@ JSHandle<EcmaString> ObjectFactory::NewFromASCIINonMovable(std::string_view data
return GetStringFromStringTableNonMovable(utf8Data, data.length());
}
JSHandle<EcmaString> ObjectFactory::NewFromASCIIReadOnly(std::string_view data)
{
auto utf8Data = reinterpret_cast<const uint8_t *>(data.data());
ASSERT(EcmaStringAccessor::CanBeCompressed(utf8Data, data.length()));
return GetStringFromStringTableReadOnly(utf8Data, data.length());
}
JSHandle<EcmaString> ObjectFactory::NewFromUtf8(std::string_view data)
{
auto utf8Data = reinterpret_cast<const uint8_t *>(data.data());

View File

@ -366,6 +366,8 @@ public:
uint32_t oldLength, uint32_t k = 0);
JSHandle<TaggedArray> NewAndCopyTaggedArray(JSHandle<TaggedArray> &srcElements, uint32_t newLength,
uint32_t oldLength, uint32_t k = 0);
JSHandle<TaggedArray> NewAndCopyTaggedArraySkipBarrier(JSHandle<TaggedArray> &srcElements, uint32_t newLength,
uint32_t oldLength, uint32_t k = 0);
JSHandle<TaggedArray> NewAndCopySNameDictionary(JSHandle<TaggedArray> &srcElements, uint32_t length);
JSHandle<TaggedArray> NewAndCopyTaggedArrayByObject(JSHandle<JSObject> thisObjHandle, uint32_t newLength,
uint32_t oldLength, uint32_t k = 0);
@ -595,6 +597,7 @@ public:
inline EcmaString *AllocLineStringObject(size_t size);
inline EcmaString *AllocLineStringObjectNoGC(size_t size);
inline EcmaString *AllocOldSpaceLineStringObject(size_t size);
inline EcmaString *AllocReadOnlyLineStringObject(size_t size);
inline EcmaString *AllocNonMovableLineStringObject(size_t size);
inline EcmaString *AllocSlicedStringObject(MemSpaceType type);
inline EcmaString *AllocConstantStringObject(MemSpaceType type);
@ -853,6 +856,8 @@ private:
// used to create nonmovable utf8 string at global constants
JSHandle<EcmaString> NewFromASCIINonMovable(std::string_view data);
// used to create nonmovable utf8 string at global constants
JSHandle<EcmaString> NewFromASCIIReadOnly(std::string_view data);
// used for creating Function
JSHandle<JSFunction> NewJSFunction(const JSHandle<GlobalEnv> &env, const JSHandle<JSHClass> &hclass);
@ -872,6 +877,7 @@ private:
JSHandle<EcmaString> GetCompressedSubStringFromStringTable(const JSHandle<EcmaString> &string, uint32_t offset,
uint32_t utf8Len) const;
JSHandle<EcmaString> GetStringFromStringTableNonMovable(const uint8_t *utf8Data, uint32_t utf8Len) const;
JSHandle<EcmaString> GetStringFromStringTableReadOnly(const uint8_t *utf8Data, uint32_t utf8Len) const;
// For MUtf-8 string data
EcmaString *PUBLIC_API GetRawStringFromStringTable(StringData sd,
MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE,

View File

@ -12,3 +12,27 @@
# limitations under the License.
TypeError: Cannot convert a BigInt value to a number
17
true
true
1200
true
true
9
true
true
1000
true
true
10
true
true
201
true
true
7
true
true
201
true
true

View File

@ -25,3 +25,49 @@ try {
} catch (e) {
print(e);
}
// Test String.prototype.split and cache
const shortString = "ababaabcdefaaaaab";
const shortTwoBytesString = "\u0429\u0428\u0428\u0429\u0429\u0428\u0429\u0429\u0429";
const longString = new Array(200).fill("abcdef").join('');
const longTwoBytesString = new Array(200).fill("\u0426\u0427\u0428\u0429\u0430").join('');
let res1 = shortString.split('');
let res2 = shortString.split('');
print(res1.length)
print(res1.length == res2.length);
print(res1[0] == res2[0]);
let res3 = longString.split('');
let res4 = longString.split('');
print(res3.length)
print(res3.length == res4.length);
print(res3[0] == res4[0]);
let res5 = shortTwoBytesString.split('');
let res6 = shortTwoBytesString.split('');
print(res5.length)
print(res5.length == res6.length);
print(res5[0] == res6[0]);
let res7 = longTwoBytesString.split('');
let res8 = longTwoBytesString.split('');
print(res7.length)
print(res7.length == res8.length);
print(res7[0] == res8[0]);
let res9 = shortString.split('a');
let res10 = shortString.split('a');
print(res9.length)
print(res9.length == res10.length);
print(res9[0] == res10[0]);
let res11 = longString.split('a');
let res12 = longString.split('a');
print(res11.length)
print(res11.length == res12.length);
print(res11[0] == res12[0]);
let res13 = shortTwoBytesString.split('\u0429');
let res14 = shortTwoBytesString.split('\u0429');
print(res13.length)
print(res13.length == res14.length);
print(res13[0] == res14[0]);
let res15 = longTwoBytesString.split('\u0429');
let res16 = longTwoBytesString.split('\u0429');
print(res15.length)
print(res15.length == res16.length);
print(res15[0] == res16[0]);