2021-09-04 08:06:49 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021 Huawei Device Co., Ltd.
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2021-09-07 14:24:16 +00:00
|
|
|
#ifndef ECMASCRIPT_STRING_INL_H
|
|
|
|
#define ECMASCRIPT_STRING_INL_H
|
2021-09-04 08:06:49 +00:00
|
|
|
|
|
|
|
#include "ecmascript/ecma_string.h"
|
2022-08-24 11:25:56 +00:00
|
|
|
#include "ecmascript/base/string_helper.h"
|
2021-09-04 08:06:49 +00:00
|
|
|
#include "ecmascript/ecma_vm.h"
|
|
|
|
#include "ecmascript/js_handle.h"
|
|
|
|
#include "ecmascript/js_tagged_value-inl.h"
|
2022-04-28 12:17:41 +00:00
|
|
|
#include "ecmascript/object_factory-inl.h"
|
2021-09-04 08:06:49 +00:00
|
|
|
|
|
|
|
namespace panda::ecmascript {
|
|
|
|
/* static */
|
|
|
|
inline EcmaString *EcmaString::CreateEmptyString(const EcmaVM *vm)
|
|
|
|
{
|
2022-10-23 15:17:44 +00:00
|
|
|
auto string = vm->GetFactory()->AllocNonMovableLineStringObject(EcmaString::SIZE);
|
2022-08-24 11:25:56 +00:00
|
|
|
string->SetLength(0, true);
|
2022-01-14 13:50:26 +00:00
|
|
|
string->SetRawHashcode(0);
|
2021-09-04 08:06:49 +00:00
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* static */
|
2022-08-24 11:25:56 +00:00
|
|
|
inline EcmaString *EcmaString::CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
|
2022-07-21 09:00:58 +00:00
|
|
|
bool canBeCompress, MemSpaceType type)
|
2021-09-04 08:06:49 +00:00
|
|
|
{
|
|
|
|
if (utf8Len == 0) {
|
|
|
|
return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
|
|
|
|
}
|
|
|
|
EcmaString *string = nullptr;
|
2021-09-07 14:24:16 +00:00
|
|
|
if (canBeCompress) {
|
2022-10-23 15:17:44 +00:00
|
|
|
string = CreateLineStringWithSpaceType(vm, utf8Len, true, type);
|
2021-09-04 08:06:49 +00:00
|
|
|
ASSERT(string != nullptr);
|
|
|
|
|
|
|
|
if (memcpy_s(string->GetDataUtf8Writable(), utf8Len, utf8Data, utf8Len) != EOK) {
|
2022-07-06 06:12:54 +00:00
|
|
|
LOG_FULL(FATAL) << "memcpy_s failed";
|
2021-09-04 08:06:49 +00:00
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
} else {
|
2022-01-13 04:02:17 +00:00
|
|
|
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
|
2022-10-23 15:17:44 +00:00
|
|
|
string = CreateLineStringWithSpaceType(vm, utf16Len, false, type);
|
2021-09-04 08:06:49 +00:00
|
|
|
ASSERT(string != nullptr);
|
|
|
|
|
|
|
|
[[maybe_unused]] auto len =
|
2022-01-13 04:02:17 +00:00
|
|
|
base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
|
2021-09-04 08:06:49 +00:00
|
|
|
ASSERT(len == utf16Len);
|
|
|
|
}
|
|
|
|
|
2022-04-19 12:32:45 +00:00
|
|
|
ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
|
2021-09-04 08:06:49 +00:00
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
2022-08-24 11:25:56 +00:00
|
|
|
inline EcmaString *EcmaString::CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
|
2022-07-21 09:00:58 +00:00
|
|
|
bool canBeCompress, MemSpaceType type)
|
2021-09-04 08:06:49 +00:00
|
|
|
{
|
|
|
|
if (utf16Len == 0) {
|
|
|
|
return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
|
|
|
|
}
|
2022-10-23 15:17:44 +00:00
|
|
|
auto string = CreateLineStringWithSpaceType(vm, utf16Len, canBeCompress, type);
|
2021-09-04 08:06:49 +00:00
|
|
|
ASSERT(string != nullptr);
|
|
|
|
|
2021-09-07 14:24:16 +00:00
|
|
|
if (canBeCompress) {
|
2022-10-23 15:17:44 +00:00
|
|
|
CopyChars(string->GetDataUtf8Writable(), utf16Data, utf16Len);
|
2021-09-04 08:06:49 +00:00
|
|
|
} else {
|
|
|
|
uint32_t len = utf16Len * (sizeof(uint16_t) / sizeof(uint8_t));
|
|
|
|
if (memcpy_s(string->GetDataUtf16Writable(), len, utf16Data, len) != EOK) {
|
2022-07-06 06:12:54 +00:00
|
|
|
LOG_FULL(FATAL) << "memcpy_s failed";
|
2021-09-04 08:06:49 +00:00
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-19 12:32:45 +00:00
|
|
|
ASSERT_PRINT(canBeCompress == CanBeCompressed(string), "Bad input canBeCompress!");
|
2021-09-04 08:06:49 +00:00
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
2022-10-23 15:17:44 +00:00
|
|
|
/* static */
|
|
|
|
inline EcmaString *EcmaString::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
|
2021-09-04 08:06:49 +00:00
|
|
|
{
|
2022-10-23 15:17:44 +00:00
|
|
|
size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
|
|
|
|
auto string = vm->GetFactory()->AllocLineStringObject(size);
|
|
|
|
string->SetLength(length, compressed);
|
|
|
|
string->SetRawHashcode(0);
|
|
|
|
return string;
|
2021-09-04 08:06:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* static */
|
2022-10-23 15:17:44 +00:00
|
|
|
inline EcmaString *EcmaString::CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed)
|
2021-09-04 08:06:49 +00:00
|
|
|
{
|
2022-10-23 15:17:44 +00:00
|
|
|
size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
|
|
|
|
size = AlignUp(size, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT));
|
|
|
|
auto object = reinterpret_cast<TaggedObject *>(vm->GetHeap()->GetOldSpace()->Allocate(size));
|
|
|
|
object->SetClass(JSHClass::Cast(vm->GetJSThread()->GlobalConstants()->GetLineStringClass().GetTaggedObject()));
|
|
|
|
auto string = EcmaString::Cast(object);
|
2021-09-04 08:06:49 +00:00
|
|
|
string->SetLength(length, compressed);
|
2022-01-14 13:50:26 +00:00
|
|
|
string->SetRawHashcode(0);
|
2022-05-31 08:29:55 +00:00
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
2022-10-23 15:17:44 +00:00
|
|
|
/* static */
|
|
|
|
inline EcmaString *EcmaString::CreateLineStringWithSpaceType(const EcmaVM *vm, size_t length, bool compressed,
|
|
|
|
MemSpaceType type)
|
|
|
|
{
|
|
|
|
size_t size = compressed ? LineEcmaString::ComputeSizeUtf8(length) : LineEcmaString::ComputeSizeUtf16(length);
|
|
|
|
EcmaString *string = nullptr;
|
|
|
|
switch (type) {
|
|
|
|
case MemSpaceType::SEMI_SPACE:
|
|
|
|
string = vm->GetFactory()->AllocLineStringObject(size);
|
|
|
|
break;
|
|
|
|
case MemSpaceType::OLD_SPACE:
|
|
|
|
string = vm->GetFactory()->AllocOldSpaceLineStringObject(size);
|
|
|
|
break;
|
|
|
|
case MemSpaceType::NON_MOVABLE:
|
|
|
|
string = vm->GetFactory()->AllocNonMovableLineStringObject(size);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
string->SetLength(length, compressed);
|
|
|
|
string->SetRawHashcode(0);
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline EcmaString *EcmaString::CreateTreeString(const EcmaVM *vm,
|
|
|
|
const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed)
|
|
|
|
{
|
|
|
|
auto thread = vm->GetJSThread();
|
|
|
|
auto string = TreeEcmaString::Cast(vm->GetFactory()->AllocTreeStringObject());
|
|
|
|
string->SetLength(length, compressed);
|
|
|
|
string->SetRawHashcode(0);
|
|
|
|
string->SetFirst(thread, left.GetTaggedValue());
|
|
|
|
string->SetSecond(thread, right.GetTaggedValue());
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
2022-06-05 09:39:02 +00:00
|
|
|
/* static */
|
|
|
|
EcmaString *EcmaString::FastSubUtf8String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
|
2022-06-07 11:58:20 +00:00
|
|
|
uint32_t length)
|
2022-06-05 09:39:02 +00:00
|
|
|
{
|
2022-10-23 15:17:44 +00:00
|
|
|
ASSERT(src->IsLineString());
|
|
|
|
auto string = CreateLineString(vm, length, true);
|
2022-06-05 09:39:02 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
2022-06-07 11:58:20 +00:00
|
|
|
Span<uint8_t> dst(string->GetDataUtf8Writable(), length);
|
|
|
|
Span<const uint8_t> source(src->GetDataUtf8() + start, length);
|
2022-10-23 15:17:44 +00:00
|
|
|
EcmaString::MemCopyChars(dst, length, source, length);
|
2022-06-05 09:39:02 +00:00
|
|
|
|
|
|
|
ASSERT_PRINT(CanBeCompressed(string), "canBeCompresse does not match the real value!");
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* static */
|
|
|
|
EcmaString *EcmaString::FastSubUtf16String(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start,
|
2022-06-07 11:58:20 +00:00
|
|
|
uint32_t length)
|
2022-06-05 09:39:02 +00:00
|
|
|
{
|
2022-10-23 15:17:44 +00:00
|
|
|
ASSERT(src->IsLineString());
|
2022-06-07 11:58:20 +00:00
|
|
|
bool canBeCompressed = CanBeCompressed(src->GetDataUtf16() + start, length);
|
2022-10-23 15:17:44 +00:00
|
|
|
auto string = CreateLineString(vm, length, canBeCompressed);
|
2022-06-05 09:39:02 +00:00
|
|
|
if (canBeCompressed) {
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
2022-10-23 15:17:44 +00:00
|
|
|
CopyChars(string->GetDataUtf8Writable(), src->GetDataUtf16() + start, length);
|
2022-06-05 09:39:02 +00:00
|
|
|
} else {
|
2022-06-07 11:58:20 +00:00
|
|
|
uint32_t len = length * (sizeof(uint16_t) / sizeof(uint8_t));
|
2022-06-05 09:39:02 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
2022-06-07 11:58:20 +00:00
|
|
|
Span<uint16_t> dst(string->GetDataUtf16Writable(), length);
|
|
|
|
Span<const uint16_t> source(src->GetDataUtf16() + start, length);
|
2022-10-23 15:17:44 +00:00
|
|
|
EcmaString::MemCopyChars(dst, len, source, len);
|
2022-06-05 09:39:02 +00:00
|
|
|
}
|
|
|
|
ASSERT_PRINT(canBeCompressed == CanBeCompressed(string), "canBeCompresse does not match the real value!");
|
|
|
|
return string;
|
|
|
|
}
|
2022-10-23 15:17:44 +00:00
|
|
|
|
|
|
|
inline uint16_t *EcmaString::GetData() const
|
|
|
|
{
|
|
|
|
ASSERT_PRINT(IsLineString(), "EcmaString: Read data from not LineString");
|
|
|
|
return LineEcmaString::Cast(this)->GetData();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline const uint8_t *EcmaString::GetDataUtf8() const
|
|
|
|
{
|
|
|
|
ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
|
|
|
|
return reinterpret_cast<uint8_t *>(GetData());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline const uint16_t *EcmaString::GetDataUtf16() const
|
|
|
|
{
|
|
|
|
LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
|
|
|
|
return GetData();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline uint8_t *EcmaString::GetDataUtf8Writable()
|
|
|
|
{
|
|
|
|
ASSERT_PRINT(IsUtf8(), "EcmaString: Read data as utf8 for utf16 string");
|
|
|
|
return reinterpret_cast<uint8_t *>(GetData());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline uint16_t *EcmaString::GetDataUtf16Writable()
|
|
|
|
{
|
|
|
|
LOG_ECMA_IF(!IsUtf16(), FATAL) << "EcmaString: Read data as utf16 for utf8 string";
|
|
|
|
return GetData();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline size_t EcmaString::GetUtf8Length(bool modify) const
|
|
|
|
{
|
|
|
|
ASSERT(IsLineString());
|
|
|
|
if (!IsUtf16()) {
|
|
|
|
return GetLength() + 1; // add place for zero in the end
|
|
|
|
}
|
|
|
|
return base::utf_helper::Utf16ToUtf8Size(GetData(), GetLength(), modify);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<bool verify>
|
|
|
|
inline uint16_t EcmaString::At(int32_t index) const
|
|
|
|
{
|
|
|
|
int32_t length = static_cast<int32_t>(GetLength());
|
|
|
|
if (verify) {
|
|
|
|
if ((index < 0) || (index >= length)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (IsLineString()) {
|
|
|
|
return LineEcmaString::Cast(this)->Get<verify>(index);
|
|
|
|
} else {
|
|
|
|
return TreeEcmaString::Cast(this)->Get<verify>(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void EcmaString::WriteData(uint32_t index, uint16_t src)
|
|
|
|
{
|
|
|
|
ASSERT(index < GetLength());
|
|
|
|
ASSERT(IsLineString());
|
|
|
|
LineEcmaString::Cast(this)->Set(index, src);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool EcmaString::IsFlat() const
|
|
|
|
{
|
|
|
|
if (!JSTaggedValue(this).IsTreeString()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return TreeEcmaString::Cast(this)->IsFlat();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Char>
|
|
|
|
void EcmaString::WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
|
|
|
|
{
|
|
|
|
DISALLOW_GARBAGE_COLLECTION;
|
|
|
|
uint32_t length = src->GetLength();
|
|
|
|
if (length == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
while (true) {
|
|
|
|
ASSERT(length <= maxLength && length > 0);
|
|
|
|
ASSERT(length <= src->GetLength());
|
|
|
|
switch (src->GetStringType()) {
|
|
|
|
case JSType::LINE_STRING: {
|
|
|
|
if (src->IsUtf8()) {
|
|
|
|
CopyChars(buf, src->GetDataUtf8(), length);
|
|
|
|
} else {
|
|
|
|
CopyChars(buf, src->GetDataUtf16(), length);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case JSType::TREE_STRING: {
|
|
|
|
TreeEcmaString *treeSrc = TreeEcmaString::Cast(src);
|
|
|
|
EcmaString *first = EcmaString::Cast(treeSrc->GetFirst());
|
|
|
|
EcmaString *second = EcmaString::Cast(treeSrc->GetSecond());
|
|
|
|
uint32_t firstLength = first->GetLength();
|
|
|
|
uint32_t secondLength = second->GetLength();
|
|
|
|
if (secondLength >= firstLength) {
|
|
|
|
// second string is longer. So recurse over first.
|
|
|
|
WriteToFlat(first, buf, maxLength);
|
|
|
|
if (first == second) {
|
|
|
|
CopyChars(buf + firstLength, buf, firstLength);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
buf += firstLength;
|
|
|
|
maxLength -= firstLength;
|
|
|
|
src = second;
|
|
|
|
length -= firstLength;
|
|
|
|
} else {
|
|
|
|
// first string is longer. So recurse over second.
|
|
|
|
if (secondLength > 0) {
|
|
|
|
if (secondLength == 1) {
|
|
|
|
buf[firstLength] = static_cast<Char>(second->At<false>(0));
|
|
|
|
} else if (second->IsLineString() && second->IsUtf8()) {
|
|
|
|
CopyChars(buf + firstLength, second->GetDataUtf8(), secondLength);
|
|
|
|
} else {
|
|
|
|
WriteToFlat(second, buf + firstLength, maxLength - firstLength);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
maxLength = firstLength;
|
|
|
|
src = first;
|
|
|
|
length -= secondLength;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inline const uint8_t *EcmaStringAccessor::GetDataUtf8()
|
|
|
|
{
|
|
|
|
return string_->GetDataUtf8();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline const uint16_t *EcmaStringAccessor::GetDataUtf16()
|
|
|
|
{
|
|
|
|
return string_->GetDataUtf16();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline size_t EcmaStringAccessor::GetUtf8Length() const
|
|
|
|
{
|
|
|
|
return string_->GetUtf8Length();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void EcmaStringAccessor::ReadData(EcmaString * dst, EcmaString *src,
|
|
|
|
uint32_t start, uint32_t destSize, uint32_t length)
|
|
|
|
{
|
|
|
|
dst->WriteData(src, start, destSize, length);
|
|
|
|
}
|
2022-06-05 09:39:02 +00:00
|
|
|
} // namespace panda::ecmascript
|
2021-09-04 08:06:49 +00:00
|
|
|
#endif
|