mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-11-23 10:09:54 +00:00
add bound check for ConvertRegionUtf8ToUtf16
Issue: https://gitee.com/openharmony/arkcompiler_ets_runtime/issues/IAR8ZY Signed-off-by: ZhouGuangyuan <zhouguangyuan1@huawei.com> Change-Id: Ia74e4e7667af6dbc4b06afadcf1b407fca2a7dc9
This commit is contained in:
parent
fa4a5954f7
commit
cb362d2f90
@ -96,7 +96,7 @@ bool NumberHelper::GotoNonspace(uint8_t **ptr, const uint8_t *end)
|
||||
++size;
|
||||
utf8Bit >>= 1UL;
|
||||
}
|
||||
if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, end - *ptr, 1, 0) <= 0) {
|
||||
if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, end - *ptr, 1) <= 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -557,7 +557,6 @@ HWTEST_F_L0(UtfHelperTest, DebuggerConvertRegionUtf16ToUtf8)
|
||||
HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16)
|
||||
{
|
||||
size_t utf16Len = 100;
|
||||
size_t start = 0;
|
||||
uint8_t utf8Value[10] = {
|
||||
0x7F, // 1-length UTF16 encoding
|
||||
0xDF, 0xBF, // 1-length UTF16 encoding
|
||||
@ -565,25 +564,13 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16)
|
||||
0xF4, 0x8F, 0xBF, 0xBF}; // 2-length UTF16 encoding
|
||||
const uint8_t *utf8ValuePtr = utf8Value;
|
||||
uint16_t *utf16Out = (uint16_t*)malloc(utf16Len);
|
||||
size_t outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
|
||||
size_t outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len);
|
||||
// 1 + 1 + 1 + 2 = 5s
|
||||
EXPECT_EQ(outPos, 5U);
|
||||
// 1 + 2 = 3
|
||||
start = 3;
|
||||
outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
|
||||
utf8ValuePtr = utf8Value + 3;
|
||||
outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value) - 3, utf16Len);
|
||||
EXPECT_EQ(outPos, 3U);
|
||||
|
||||
// When "start" is in the middle of a symbol sequence
|
||||
start = 2;
|
||||
outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
|
||||
EXPECT_EQ(outPos, 0U);
|
||||
start = 4;
|
||||
outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
|
||||
EXPECT_EQ(outPos, 0U);
|
||||
start = 7;
|
||||
outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len, start);
|
||||
EXPECT_EQ(outPos, 0U);
|
||||
free(utf16Out);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -852,7 +839,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_001) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F}; // "Hello"
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -867,7 +854,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_002) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x4F60, 0x597D, 0xFF0C, 0x4E16, 0x754C, 0xFF01}; // "你好,世界!"
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -882,7 +869,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_003) {
|
||||
std::vector<uint16_t> expected_utf16 = {}; // Empty
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -897,7 +884,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_004) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002C, 0x20, 0x4F60};
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), 10, utf16.size(), 0); // Only process the first 9 bytes
|
||||
utf16.data(), 10, utf16.size()); // Only process the first 9 bytes
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -912,7 +899,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_005) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x4F60, 0x597D}; // "你好"
|
||||
std::vector<uint16_t> utf16(2); // Limit buffer length
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -927,7 +914,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_006) {
|
||||
std::vector<uint16_t> expected_utf16 = {}; // Expected empty output, handling erroneous data
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_NE(utf16, expected_utf16);
|
||||
}
|
||||
@ -942,7 +929,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_007) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x0041, 0x0042, 0x0043}; // ASCII characters: A, B, C
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -957,7 +944,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_008) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x00A2, 0x00FC}; // Unicode .
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -972,7 +959,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_009) {
|
||||
std::vector<uint16_t> expected_utf16 = {0x20AC}; // Unicode .
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -987,7 +974,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_010) {
|
||||
std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -1003,7 +990,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_011) {
|
||||
0x0000, 0x0057, 0x006F, 0x0072, 0x006C, 0x0064}; // Including NULL characters
|
||||
std::vector<uint16_t> utf16(15);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8Nul.data()),
|
||||
utf16.data(), utf8Nul.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8Nul.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(utf16, expected_utf16);
|
||||
}
|
||||
@ -1018,7 +1005,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_012) {
|
||||
std::vector<uint16_t> expected_utf16 = {};
|
||||
std::vector<uint16_t> utf16(10);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_NE(utf16, expected_utf16);
|
||||
}
|
||||
@ -1033,7 +1020,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_013) {
|
||||
std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
|
||||
std::vector<uint16_t> utf16(0);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(converted, 0);
|
||||
}
|
||||
@ -1047,7 +1034,7 @@ HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_014) {
|
||||
std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
|
||||
std::vector<uint16_t> utf16(1);
|
||||
size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
|
||||
utf16.data(), utf8.size(), utf16.size(), 0);
|
||||
utf16.data(), utf8.size(), utf16.size());
|
||||
utf16.resize(converted);
|
||||
EXPECT_EQ(converted, 0);
|
||||
}
|
||||
|
@ -350,11 +350,31 @@ std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com
|
||||
return {pair, UtfLength::FOUR};
|
||||
}
|
||||
|
||||
// drop the tail bytes if the remain length can't fill the length it represents.
|
||||
static inline size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
|
||||
{
|
||||
size_t trimSize = 0;
|
||||
if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
|
||||
// The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
|
||||
trimSize = 1;
|
||||
}
|
||||
if (utf8Len >= CONST_2 && utf8[utf8Len - CONST_2] >= 0xE0) {
|
||||
// The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
|
||||
trimSize = CONST_2;
|
||||
}
|
||||
if (utf8Len >= CONST_3 && utf8[utf8Len - CONST_3] >= 0xF0) {
|
||||
// The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
|
||||
trimSize = CONST_3;
|
||||
}
|
||||
return utf8Len - trimSize;
|
||||
}
|
||||
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
|
||||
{
|
||||
size_t safeUtf8Len = FixUtf8Len(utf8, utf8Len);
|
||||
size_t in_pos = 0;
|
||||
size_t res = 0;
|
||||
while (in_pos < utf8Len) {
|
||||
while (in_pos < safeUtf8Len) {
|
||||
uint8_t src = utf8[in_pos];
|
||||
switch (src & 0xF0) {
|
||||
case 0xF0: {
|
||||
@ -386,40 +406,21 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
|
||||
do {
|
||||
in_pos++;
|
||||
res++;
|
||||
} while (in_pos < utf8Len && utf8[in_pos] < 0x80);
|
||||
} while (in_pos < safeUtf8Len && utf8[in_pos] < 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// The remain chars should be treated as single byte char.
|
||||
res += utf8Len - in_pos;
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t ConvertUtf8ToUtf16Int(const uint8_t data, size_t max_bytes)
|
||||
{
|
||||
if ((data & MASK1) == 0 || max_bytes < CONST_4) {
|
||||
return 1;
|
||||
}
|
||||
if ((data & MASK2) == 0) {
|
||||
return CONST_2;
|
||||
}
|
||||
if ((data & MASK3) == 0) {
|
||||
return CONST_3;
|
||||
}
|
||||
return CONST_4;
|
||||
}
|
||||
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len,
|
||||
size_t start)
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len)
|
||||
{
|
||||
size_t safeUtf8Len = FixUtf8Len(utf8In, utf8Len);
|
||||
size_t in_pos = 0;
|
||||
size_t out_pos = 0;
|
||||
|
||||
while (in_pos < utf8Len && start > 0) {
|
||||
auto nbytes = ConvertUtf8ToUtf16Int(utf8In[in_pos], utf8Len - in_pos);
|
||||
in_pos += nbytes;
|
||||
start -= nbytes;
|
||||
}
|
||||
|
||||
while (in_pos < utf8Len && out_pos < utf16Len) {
|
||||
while (in_pos < safeUtf8Len && out_pos < utf16Len) {
|
||||
uint8_t src = utf8In[in_pos];
|
||||
switch (src & 0xF0) {
|
||||
case 0xF0: {
|
||||
@ -460,10 +461,14 @@ size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_
|
||||
default:
|
||||
do {
|
||||
utf16Out[out_pos++] = static_cast<uint16_t>(utf8In[in_pos++]);
|
||||
} while (in_pos < utf8Len && out_pos < utf16Len && utf8In[in_pos] < 0x80);
|
||||
} while (in_pos < safeUtf8Len && out_pos < utf16Len && utf8In[in_pos] < 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// The remain chars should be treated as single byte char.
|
||||
while (in_pos < utf8Len && out_pos < utf16Len) {
|
||||
utf16Out[out_pos++] = static_cast<uint16_t>(utf8In[in_pos++]);
|
||||
}
|
||||
return out_pos;
|
||||
}
|
||||
|
||||
|
@ -122,8 +122,7 @@ std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com
|
||||
|
||||
size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len);
|
||||
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len,
|
||||
size_t start);
|
||||
size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len);
|
||||
|
||||
size_t ConvertRegionUtf16ToLatin1(const uint16_t *utf16In, uint8_t *latin1Out, size_t utf16Len, size_t latin1Len);
|
||||
|
||||
|
@ -58,7 +58,7 @@ inline EcmaString *EcmaString::CreateFromUtf8(const EcmaVM *vm, const uint8_t *u
|
||||
ASSERT(string != nullptr);
|
||||
|
||||
[[maybe_unused]] auto len =
|
||||
base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len, 0);
|
||||
base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf8Len, utf16Len);
|
||||
ASSERT(len == utf16Len);
|
||||
}
|
||||
|
||||
|
@ -821,7 +821,7 @@ uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len
|
||||
auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
|
||||
CVector<uint16_t> tmpBuffer(utf16Len);
|
||||
[[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
|
||||
utf16Len, 0);
|
||||
utf16Len);
|
||||
ASSERT(len == utf16Len);
|
||||
uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
|
||||
return MixHashcode(hash, NOT_INTEGER);
|
||||
|
@ -435,9 +435,9 @@ private:
|
||||
CVector<uint8_t> tmpBuf;
|
||||
const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
|
||||
if (length > bufLength) {
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength, 0);
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
|
||||
}
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength, 0);
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
|
||||
}
|
||||
|
||||
// It allows user to copy into buffer even if maxLength < length
|
||||
@ -510,31 +510,23 @@ private:
|
||||
|
||||
inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
|
||||
{
|
||||
return CopyDataRegionUtf16(buf, 0, GetLength(), maxLength);
|
||||
}
|
||||
|
||||
uint32_t CopyDataRegionUtf16(uint16_t *buf, uint32_t start, uint32_t length, uint32_t maxLength) const
|
||||
{
|
||||
uint32_t length = GetLength();
|
||||
if (length > maxLength) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t len = GetLength();
|
||||
if (start + length > len) {
|
||||
return 0;
|
||||
}
|
||||
if (IsUtf16()) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
CVector<uint16_t> tmpBuf;
|
||||
const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
|
||||
if (memcpy_s(buf, maxLength * sizeof(uint16_t), data + start, length * sizeof(uint16_t)) != EOK) {
|
||||
const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
|
||||
if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
|
||||
LOG_FULL(FATAL) << "memcpy_s failed";
|
||||
UNREACHABLE();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
CVector<uint8_t> tmpBuf;
|
||||
const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, len, maxLength, start);
|
||||
const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
|
||||
return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
|
||||
}
|
||||
|
||||
std::u16string ToU16String(uint32_t len = 0);
|
||||
|
Loading…
Reference in New Issue
Block a user