mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-10-07 16:13:49 +00:00
00ee3d9b98
关联:I57C0W Signed-off-by: yaoyuan <yuanyao14@huawei.com> Change-Id: I6255909235e1a3e43766338015a3cd932bf3542d
371 lines
13 KiB
C++
371 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2021 Huawei Device Co., Ltd.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ECMASCRIPT_STRING_H
|
|
#define ECMASCRIPT_STRING_H
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
|
|
#include "ecmascript/base/utf_helper.h"
|
|
#include "ecmascript/ecma_macros.h"
|
|
#include "ecmascript/js_tagged_value.h"
|
|
#include "ecmascript/mem/tagged_object.h"
|
|
#include "ecmascript/mem/barriers.h"
|
|
|
|
namespace panda {
|
|
namespace ecmascript {
|
|
template<typename T>
|
|
class JSHandle;
|
|
class EcmaVM;
|
|
|
|
class EcmaString : public TaggedObject {
|
|
public:
|
|
static EcmaString *Cast(ObjectHeader *object);
|
|
static const EcmaString *ConstCast(const TaggedObject *object);
|
|
|
|
static EcmaString *CreateEmptyString(const EcmaVM *vm);
|
|
static EcmaString *CreateFromUtf8(const uint8_t *utf8Data, uint32_t utf8Len, const EcmaVM *vm, bool canBeCompress);
|
|
static EcmaString *CreateFromUtf16(const uint16_t *utf16Data, uint32_t utf16Len, const EcmaVM *vm,
|
|
bool canBeCompress);
|
|
static EcmaString *Concat(const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle,
|
|
const EcmaVM *vm);
|
|
static EcmaString *FastSubString(const JSHandle<EcmaString> &src, uint32_t start, uint32_t utf16Len,
|
|
const EcmaVM *vm);
|
|
|
|
static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
|
|
static constexpr uint32_t STRING_INTERN_BIT = 0x2;
|
|
enum CompressedStatus {
|
|
STRING_COMPRESSED,
|
|
STRING_UNCOMPRESSED,
|
|
};
|
|
|
|
template<bool verify = true>
|
|
uint16_t At(int32_t index) const;
|
|
|
|
int32_t Compare(const EcmaString *rhs) const;
|
|
|
|
bool IsUtf16() const
|
|
{
|
|
return compressedStringsEnabled ? ((GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED) : true;
|
|
}
|
|
|
|
bool IsUtf8() const
|
|
{
|
|
return compressedStringsEnabled ? ((GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED) : false;
|
|
}
|
|
|
|
static size_t ComputeDataSizeUtf16(uint32_t length)
|
|
{
|
|
return length * sizeof(uint16_t);
|
|
}
|
|
|
|
/**
|
|
* Methods for uncompressed strings (UTF16):
|
|
*/
|
|
static size_t ComputeSizeUtf16(uint32_t utf16Len)
|
|
{
|
|
return DATA_OFFSET + ComputeDataSizeUtf16(utf16Len);
|
|
}
|
|
|
|
inline uint16_t *GetData() const
|
|
{
|
|
return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
|
|
}
|
|
|
|
const uint16_t *GetDataUtf16() const
|
|
{
|
|
LOG_IF(!IsUtf16(), FATAL, RUNTIME) << "EcmaString: Read data as utf16 for utf8 string";
|
|
return GetData();
|
|
}
|
|
|
|
/**
|
|
* Methods for compresses strings (UTF8 or LATIN1):
|
|
*/
|
|
static size_t ComputeSizeUtf8(uint32_t utf8Len)
|
|
{
|
|
return DATA_OFFSET + utf8Len;
|
|
}
|
|
|
|
/**
|
|
* It's Utf8 format, but without 0 in the end.
|
|
*/
|
|
const uint8_t *GetDataUtf8() const
|
|
{
|
|
LOG_IF(IsUtf16(), FATAL, RUNTIME) << "EcmaString: Read data as utf8 for utf16 string";
|
|
return reinterpret_cast<uint8_t *>(GetData());
|
|
}
|
|
|
|
size_t GetUtf8Length() const
|
|
{
|
|
if (!IsUtf16()) {
|
|
return GetLength() + 1; // add place for zero in the end
|
|
}
|
|
return base::utf_helper::Utf16ToUtf8Size(GetData(), GetLength());
|
|
}
|
|
|
|
size_t GetUtf16Length() const
|
|
{
|
|
return GetLength();
|
|
}
|
|
|
|
inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength) const
|
|
{
|
|
if (maxLength == 0) {
|
|
return 1; // maxLength was -1 at napi
|
|
}
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
|
buf[maxLength - 1] = '\0';
|
|
// Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
|
|
size_t length = GetLength();
|
|
if (length > maxLength) {
|
|
return 0;
|
|
}
|
|
return CopyDataRegionUtf8(buf, 0, length, maxLength) + 1; // add place for zero in the end
|
|
}
|
|
|
|
// It allows user to copy into buffer even if maxLength < length
|
|
inline size_t WriteUtf8(uint8_t *buf, size_t maxLength) const
|
|
{
|
|
if (maxLength == 0) {
|
|
return 1; // maxLength was -1 at napi
|
|
}
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
|
buf[maxLength - 1] = '\0';
|
|
return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength) + 1; // add place for zero in the end
|
|
}
|
|
|
|
size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength) const
|
|
{
|
|
uint32_t len = GetLength();
|
|
if (start + length > len) {
|
|
return 0;
|
|
}
|
|
if (!IsUtf16()) {
|
|
if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half
|
|
LOG(FATAL, RUNTIME) << " length is higher than half of size_t::max";
|
|
UNREACHABLE();
|
|
}
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
|
// Only memcpy_s maxLength number of chars into buffer if length > maxLength
|
|
if (length > maxLength) {
|
|
if (memcpy_s(buf, maxLength, GetDataUtf8() + start, maxLength) != EOK) {
|
|
LOG(FATAL, RUNTIME) << "memcpy_s failed when length > maxlength";
|
|
UNREACHABLE();
|
|
}
|
|
return maxLength;
|
|
}
|
|
if (memcpy_s(buf, maxLength, GetDataUtf8() + start, length) != EOK) {
|
|
LOG(FATAL, RUNTIME) << "memcpy_s failed when length <= maxlength";
|
|
UNREACHABLE();
|
|
}
|
|
return length;
|
|
}
|
|
if (length > maxLength) {
|
|
return base::utf_helper::ConvertRegionUtf16ToUtf8(GetDataUtf16(), buf, maxLength, maxLength, start);
|
|
}
|
|
return base::utf_helper::ConvertRegionUtf16ToUtf8(GetDataUtf16(), buf, length, maxLength, start);
|
|
}
|
|
|
|
inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
|
|
{
|
|
return CopyDataRegionUtf16(buf, 0, GetLength(), maxLength);
|
|
}
|
|
|
|
uint32_t CopyDataRegionUtf16(uint16_t *buf, uint32_t start, uint32_t length, uint32_t maxLength) const
|
|
{
|
|
if (length > maxLength) {
|
|
return 0;
|
|
}
|
|
uint32_t len = GetLength();
|
|
if (start + length > len) {
|
|
return 0;
|
|
}
|
|
if (IsUtf16()) {
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
|
if (memcpy_s(buf, ComputeDataSizeUtf16(maxLength), GetDataUtf16() + start, ComputeDataSizeUtf16(length)) !=
|
|
EOK) {
|
|
LOG(FATAL, RUNTIME) << "memcpy_s failed";
|
|
UNREACHABLE();
|
|
}
|
|
return length;
|
|
}
|
|
return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), buf, len, maxLength, start);
|
|
}
|
|
|
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays)
|
|
inline std::unique_ptr<char[]> GetCString()
|
|
{
|
|
auto length = GetUtf8Length();
|
|
char *buf = new char[length]();
|
|
CopyDataUtf8(reinterpret_cast<uint8_t *>(buf), length);
|
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays)
|
|
return std::unique_ptr<char[]>(buf);
|
|
}
|
|
|
|
inline void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
|
|
inline void WriteData(char src, uint32_t start);
|
|
uint32_t GetLength() const
|
|
{
|
|
return GetMixLength() >> 2U;
|
|
}
|
|
|
|
void SetIsInternString()
|
|
{
|
|
SetMixLength(GetMixLength() | STRING_INTERN_BIT);
|
|
}
|
|
|
|
bool IsInternString() const
|
|
{
|
|
return (GetMixLength() & STRING_INTERN_BIT) != 0;
|
|
}
|
|
|
|
void ClearInternStringFlag()
|
|
{
|
|
SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
|
|
}
|
|
|
|
size_t ObjectSize() const
|
|
{
|
|
uint32_t length = GetLength();
|
|
return IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
|
|
}
|
|
|
|
uint32_t GetHashcode()
|
|
{
|
|
uint32_t hashcode = GetRawHashcode();
|
|
if (hashcode == 0) {
|
|
hashcode = ComputeHashcode(0);
|
|
SetRawHashcode(hashcode);
|
|
}
|
|
return hashcode;
|
|
}
|
|
|
|
uint32_t ComputeHashcode(uint32_t hashSeed) const;
|
|
|
|
int32_t IndexOf(const EcmaString *rhs, int pos = 0) const;
|
|
|
|
static constexpr uint32_t GetStringCompressionMask()
|
|
{
|
|
return STRING_COMPRESSED_BIT;
|
|
}
|
|
|
|
/**
|
|
* Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
|
|
*/
|
|
bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
|
|
/**
|
|
* Compares strings by bytes, It doesn't check canonical unicode equivalence.
|
|
*/
|
|
static bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
|
|
/**
|
|
* Two strings have the same type of utf encoding format.
|
|
*/
|
|
static bool StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2);
|
|
/**
|
|
* Compares strings by bytes, It doesn't check canonical unicode equivalence.
|
|
*/
|
|
static bool StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
|
|
bool canBeCompress);
|
|
/**
|
|
* Compares strings by bytes, It doesn't check canonical unicode equivalence.
|
|
*/
|
|
static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
|
|
static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
|
|
static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
|
|
|
|
static void SetCompressedStringsEnabled(bool val)
|
|
{
|
|
compressedStringsEnabled = val;
|
|
}
|
|
|
|
static bool GetCompressedStringsEnabled()
|
|
{
|
|
return compressedStringsEnabled;
|
|
}
|
|
|
|
static EcmaString *AllocStringObject(size_t length, bool compressed, const EcmaVM *vm);
|
|
|
|
static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
|
|
static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
|
|
static bool CanBeCompressed(const EcmaString *string);
|
|
|
|
static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
|
|
// In last bit of mix_length we store if this string is compressed or not.
|
|
ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, HASHCODE_OFFSET)
|
|
ACCESSORS_PRIMITIVE_FIELD(RawHashcode, uint32_t, HASHCODE_OFFSET, SIZE)
|
|
// DATA_OFFSET: the string data stored after the string header.
|
|
// Data can be stored in utf8 or utf16 form according to compressed bit.
|
|
static constexpr size_t DATA_OFFSET = SIZE; // DATA_OFFSET equal to Empty String size
|
|
|
|
private:
|
|
void SetLength(uint32_t length, bool compressed = false)
|
|
{
|
|
ASSERT(length < 0x40000000U);
|
|
// Use 0u for compressed/utf8 expression
|
|
SetMixLength((length << 2U) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
|
|
}
|
|
|
|
uint16_t *GetDataUtf16Writable()
|
|
{
|
|
LOG_IF(!IsUtf16(), FATAL, RUNTIME) << "EcmaString: Read data as utf16 for utf8 string";
|
|
return GetData();
|
|
}
|
|
|
|
uint8_t *GetDataUtf8Writable()
|
|
{
|
|
LOG_IF(IsUtf16(), FATAL, RUNTIME) << "EcmaString: Read data as utf8 for utf16 string";
|
|
return reinterpret_cast<uint8_t *>(GetData());
|
|
}
|
|
|
|
static void CopyUtf16AsUtf8(const uint16_t *utf16From, uint8_t *utf8To, uint32_t utf16Len);
|
|
|
|
static bool compressedStringsEnabled;
|
|
|
|
static bool IsASCIICharacter(uint16_t data)
|
|
{
|
|
// \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
|
|
return data - 1U < base::utf_helper::UTF8_1B_MAX;
|
|
}
|
|
|
|
/**
|
|
* str1 should have the same length as utf16_data.
|
|
* Converts utf8Data to utf16 and compare it with given utf16_data.
|
|
*/
|
|
static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
|
|
uint32_t utf16Len);
|
|
|
|
template<typename T>
|
|
/**
|
|
* Check that two spans are equal. Should have the same length.
|
|
*/
|
|
static bool StringsAreEquals(Span<const T> &str1, Span<const T> &str2);
|
|
|
|
template<typename T>
|
|
/**
|
|
* Copy String from src to dst
|
|
* */
|
|
static bool StringCopy(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
|
|
|
|
template<typename T1, typename T2>
|
|
static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
|
|
};
|
|
|
|
static_assert((EcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
|
|
} // namespace ecmascript
|
|
} // namespace panda
|
|
#endif // ECMASCRIPT_STRING_H
|