mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-11-27 04:00:37 +00:00
fix conversion function bug in 'utf_helper' when utf8 characters do not end with '\0' and reconstruct ecmastring
Signed-off-by: zhaozhibo <zhaozhibo3@huawei.com>
This commit is contained in:
parent
9d9de75ab3
commit
bd53e23a1a
@ -69,14 +69,14 @@ bool NumberHelper::GotoNonspace(uint8_t **ptr, const uint8_t *end)
|
||||
while (*ptr < end) {
|
||||
uint16_t c = **ptr;
|
||||
size_t size = 1;
|
||||
if (**ptr > INT8_MAX) {
|
||||
if (c > INT8_MAX) {
|
||||
size = 0;
|
||||
uint16_t utf8Bit = INT8_MAX + 1; // equal 0b1000'0000
|
||||
while (utf8Bit > 0 && (c & utf8Bit) == utf8Bit) {
|
||||
++size;
|
||||
utf8Bit >>= 1UL;
|
||||
}
|
||||
if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, 1, 0) <= 0) {
|
||||
if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, end - *ptr, 1, 0) <= 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ bool IsValidUTF8(const std::vector<uint8_t> &data)
|
||||
uint32_t length = data.size();
|
||||
switch (length) {
|
||||
case UtfLength::ONE:
|
||||
if (data.at(0) > BIT_MASK_1) {
|
||||
if (data.at(0) >= BIT_MASK_1) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -216,48 +216,14 @@ std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com
|
||||
return {pair, UtfLength::FOUR};
|
||||
}
|
||||
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8)
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
|
||||
{
|
||||
size_t res = 0;
|
||||
while (*utf8 != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8);
|
||||
res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE; // NOLINT(readability-magic-numbers)
|
||||
utf8 += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
}
|
||||
return res;
|
||||
return utf::MUtf8ToUtf16Size(utf8, utf8Len);
|
||||
}
|
||||
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start)
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len,
|
||||
size_t start)
|
||||
{
|
||||
ASSERT(utf16Out != nullptr);
|
||||
size_t outPos = 0;
|
||||
while (*utf8In != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8In);
|
||||
auto [pHi, pLo] = utf::SplitUtf16Pair(pair);
|
||||
|
||||
utf8In += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
if (start > 0) {
|
||||
start -= nbytes;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pHi != 0) {
|
||||
if (outPos >= utf16Len - 1) { // check for place for two uint16
|
||||
break;
|
||||
}
|
||||
outPos++;
|
||||
*utf16Out++ = pHi; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
}
|
||||
if (outPos >= utf16Len) {
|
||||
break;
|
||||
}
|
||||
outPos++;
|
||||
*utf16Out++ = pLo; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
if (outPos >= utf16Len) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return outPos;
|
||||
return utf::ConvertRegionMUtf8ToUtf16(utf8In, utf16Out, utf8Len, utf16Len, start);
|
||||
}
|
||||
} // namespace panda::ecmascript::base::utf_helper
|
||||
|
@ -69,9 +69,10 @@ size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_
|
||||
|
||||
std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool combine = false);
|
||||
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8);
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len);
|
||||
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start);
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len,
|
||||
size_t start);
|
||||
|
||||
static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1)
|
||||
{
|
||||
|
@ -63,12 +63,12 @@ inline EcmaString *EcmaString::CreateFromUtf8(const uint8_t *utf8Data, uint32_t
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data);
|
||||
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
|
||||
string = AllocStringObject(utf16Len, false, vm);
|
||||
ASSERT(string != nullptr);
|
||||
|
||||
[[maybe_unused]] auto len =
|
||||
base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf16Len, 0);
|
||||
base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
|
||||
ASSERT(len == utf16Len);
|
||||
}
|
||||
|
||||
|
@ -321,7 +321,7 @@ bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8
|
||||
Span<const uint8_t> data2(utf8Data, utf8Len);
|
||||
return EcmaString::StringsAreEquals(data1, data2);
|
||||
}
|
||||
return IsUtf8EqualsUtf16(utf8Data, str1->GetDataUtf16(), str1->GetLength());
|
||||
return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
|
||||
}
|
||||
|
||||
/* static */
|
||||
@ -331,7 +331,7 @@ bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *ut
|
||||
if (str1->GetLength() != utf16Len) {
|
||||
result = false;
|
||||
} else if (!str1->IsUtf16()) {
|
||||
result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), utf16Data, utf16Len);
|
||||
result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
|
||||
} else {
|
||||
Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
|
||||
Span<const uint16_t> data2(utf16Data, utf16Len);
|
||||
@ -422,15 +422,16 @@ uint32_t EcmaString::ComputeHashcode() const
|
||||
}
|
||||
|
||||
/* static */
|
||||
uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress)
|
||||
uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
|
||||
{
|
||||
uint32_t hash;
|
||||
if (canBeCompress) {
|
||||
hash = ComputeHashForUtf8(utf8Data);
|
||||
} else {
|
||||
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data);
|
||||
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
|
||||
CVector<uint16_t> tmpBuffer(utf16Len);
|
||||
[[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf16Len, 0);
|
||||
[[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
|
||||
utf16Len, 0);
|
||||
ASSERT(len == utf16Len);
|
||||
hash = ComputeHashForData(tmpBuffer.data(), utf16Len);
|
||||
}
|
||||
@ -444,12 +445,13 @@ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t le
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len)
|
||||
bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
|
||||
uint32_t utf16Len)
|
||||
{
|
||||
// length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
|
||||
uint32_t utf8ConvertLength = utf16Len + 1;
|
||||
CVector<uint16_t> tmpBuffer(utf8ConvertLength);
|
||||
auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8ConvertLength, 0);
|
||||
auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
|
||||
if (len != utf16Len) {
|
||||
return false;
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ public:
|
||||
}
|
||||
return length;
|
||||
}
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), buf, maxLength, start);
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), buf, len, maxLength, start);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(modernize-avoid-c-arrays)
|
||||
@ -245,7 +245,7 @@ public:
|
||||
* Compares strings by bytes, It doesn't check canonical unicode equivalence.
|
||||
*/
|
||||
static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
|
||||
static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress);
|
||||
static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
|
||||
static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
|
||||
|
||||
static void SetCompressedStringsEnabled(bool val)
|
||||
@ -303,7 +303,8 @@ private:
|
||||
* str1 should have the same length as utf16_data.
|
||||
* Converts utf8Data to utf16 and compare it with given utf16_data.
|
||||
*/
|
||||
static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len);
|
||||
static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
|
||||
uint32_t utf16Len);
|
||||
|
||||
template<typename T>
|
||||
/**
|
||||
|
@ -26,7 +26,7 @@ EcmaStringTable::EcmaStringTable(const EcmaVM *vm) : vm_(vm) {}
|
||||
|
||||
EcmaString *EcmaStringTable::GetString(const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress) const
|
||||
{
|
||||
uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, canBeCompress);
|
||||
uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
|
||||
for (auto it = table_.find(hashCode); it != table_.end(); it++) {
|
||||
auto foundedString = it->second;
|
||||
if (EcmaString::StringsAreEqualUtf8(foundedString, utf8Data, utf8Len, canBeCompress)) {
|
||||
|
@ -1647,7 +1647,7 @@ HWTEST_F_L0(EcmaStringTest, ComputeHashcodeUtf8)
|
||||
for (uint32_t i = 0; i < lengthEcmaStrU8; i++) {
|
||||
hashExpect = hashExpect * 31 + arrayU8[i];
|
||||
}
|
||||
EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8), static_cast<int32_t>(hashExpect));
|
||||
EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8, true), static_cast<int32_t>(hashExpect));
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user