mirror of
https://gitee.com/openharmony/arkcompiler_ets_runtime
synced 2024-11-23 10:09:54 +00:00
BUG in Regexp.replace
Issue: https://gitee.com/openharmony/arkcompiler_ets_runtime/issues/IAUDBH Signed-off-by: hecunmao <hecunmao@huawei.com> Change-Id: I48d5d12004adcab4050a2d8c3766aaeaf15d40fd
This commit is contained in:
parent
475b42db2a
commit
7ee26c3852
@ -44,14 +44,14 @@ bool IsUTF16LowSurrogate(uint16_t ch)
|
||||
}
|
||||
|
||||
// Methods for decode utf16 to unicode
|
||||
uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index)
|
||||
uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index, bool cesu8)
|
||||
{
|
||||
uint16_t high = utf16[*index];
|
||||
if ((high & SURROGATE_MASK) != DECODE_LEAD_LOW || !IsUTF16HighSurrogate(high) || *index == len - 1) {
|
||||
return high;
|
||||
}
|
||||
uint16_t low = utf16[*index + 1];
|
||||
if (!IsUTF16LowSurrogate(low)) {
|
||||
if (!IsUTF16LowSurrogate(low) || cesu8) {
|
||||
return high;
|
||||
}
|
||||
(*index)++;
|
||||
@ -221,7 +221,7 @@ Utf8Char ConvertUtf16ToUtf8(uint16_t d0, uint16_t d1, bool modify, bool isWriteB
|
||||
return {UtfLength::FOUR, {ch0, ch1, ch2, ch3}};
|
||||
}
|
||||
|
||||
size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool isGetBufferSize)
|
||||
size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool isGetBufferSize, bool cesu8)
|
||||
{
|
||||
size_t res = 1; // zero byte
|
||||
// when utf16 data length is only 1 and code in 0xd800-0xdfff,
|
||||
@ -247,7 +247,7 @@ size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool
|
||||
} else if (utf16[i] < utf::HI_SURROGATE_MIN || utf16[i] > utf::HI_SURROGATE_MAX) {
|
||||
res += UtfLength::THREE;
|
||||
} else {
|
||||
if (i < length - 1 &&
|
||||
if (!cesu8 && i < length - 1 &&
|
||||
utf16[i + 1] >= utf::LO_SURROGATE_MIN && // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
utf16[i + 1] <= utf::LO_SURROGATE_MAX) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
res += UtfLength::FOUR;
|
||||
@ -261,7 +261,7 @@ size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool
|
||||
}
|
||||
|
||||
size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
|
||||
size_t start, bool modify, bool isWriteBuffer)
|
||||
size_t start, bool modify, bool isWriteBuffer, bool cesu8)
|
||||
{
|
||||
if (utf16In == nullptr || utf8Out == nullptr || utf8Len == 0) {
|
||||
return 0;
|
||||
@ -269,7 +269,7 @@ size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_
|
||||
size_t utf8Pos = 0;
|
||||
size_t end = start + utf16Len;
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
uint32_t codepoint = DecodeUTF16(utf16In, end, &i);
|
||||
uint32_t codepoint = DecodeUTF16(utf16In, end, &i, cesu8);
|
||||
if (codepoint == 0) {
|
||||
if (isWriteBuffer) {
|
||||
utf8Out[utf8Pos++] = 0x00U;
|
||||
|
@ -100,7 +100,7 @@ struct Utf8Char {
|
||||
|
||||
static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
|
||||
|
||||
uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index);
|
||||
uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index, bool cesu8 = false);
|
||||
|
||||
size_t EncodeUTF8(uint32_t codepoint, uint8_t* utf8, size_t len, size_t index);
|
||||
|
||||
@ -110,10 +110,12 @@ bool IsValidUTF8(const std::vector<uint8_t> &data);
|
||||
|
||||
Utf8Char ConvertUtf16ToUtf8(uint16_t d0, uint16_t d1, bool modify, bool isWriteBuffer = false);
|
||||
|
||||
size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify = true, bool isGetBufferSize = false);
|
||||
size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify = true,
|
||||
bool isGetBufferSize = false, bool cesu8 = false);
|
||||
|
||||
size_t PUBLIC_API ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
|
||||
size_t start, bool modify = true, bool isWriteBuffer = false);
|
||||
size_t PUBLIC_API ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len,
|
||||
size_t utf8Len, size_t start, bool modify = true,
|
||||
bool isWriteBuffer = false, bool cesu = false);
|
||||
|
||||
size_t DebuggerConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
|
||||
size_t start, bool modify = true, bool isWriteBuffer = false);
|
||||
|
@ -2361,7 +2361,8 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle<
|
||||
auto getCache = regExpParserCache->GetCache(*patternStrHandle, flagsBits, groupName);
|
||||
if (getCache.first.IsHole()) {
|
||||
// String -> CString
|
||||
CString patternStdStr = ConvertToString(*patternStrHandle, StringConvertedUsage::LOGICOPERATION);
|
||||
bool cesu8 = !(RegExpParser::FLAG_UTF16 & flagsBits);
|
||||
CString patternStdStr = ConvertToString(*patternStrHandle, StringConvertedUsage::LOGICOPERATION, cesu8);
|
||||
parser.Init(const_cast<char *>(reinterpret_cast<const char *>(patternStdStr.c_str())), patternStdStr.size(),
|
||||
flagsBits);
|
||||
parser.Parse();
|
||||
|
@ -1595,14 +1595,14 @@ std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
|
||||
return res;
|
||||
}
|
||||
|
||||
CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
|
||||
CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
|
||||
{
|
||||
if (string_ == nullptr) {
|
||||
return "";
|
||||
}
|
||||
bool modify = (usage != StringConvertedUsage::PRINT);
|
||||
CVector<uint8_t> buf;
|
||||
Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
|
||||
Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
|
||||
CString res;
|
||||
res.reserve(sp.size());
|
||||
for (const auto &c : sp) {
|
||||
|
@ -553,17 +553,17 @@ private:
|
||||
return std::unique_ptr<uint8_t[]>(buf);
|
||||
}
|
||||
|
||||
Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
|
||||
Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
|
||||
{
|
||||
Span<const uint8_t> str;
|
||||
uint32_t strLen = GetLength();
|
||||
if (UNLIKELY(IsUtf16())) {
|
||||
CVector<uint16_t> tmpBuf;
|
||||
const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
|
||||
ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) > 0);
|
||||
size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
|
||||
ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
|
||||
size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
|
||||
buf.reserve(len);
|
||||
len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
|
||||
len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
|
||||
str = Span<const uint8_t>(buf.data(), len);
|
||||
} else {
|
||||
const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
|
||||
@ -1258,7 +1258,7 @@ public:
|
||||
std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
|
||||
// not change string data structure.
|
||||
// if string is not flat, this func has low efficiency.
|
||||
CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION);
|
||||
CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
|
||||
|
||||
// not change string data structure.
|
||||
// if string is not flat, this func has low efficiency.
|
||||
|
@ -94,12 +94,12 @@ CString ConvertToString(const std::string &str)
|
||||
return res;
|
||||
}
|
||||
|
||||
CString ConvertToString(const EcmaString *s, StringConvertedUsage usage)
|
||||
CString ConvertToString(const EcmaString *s, StringConvertedUsage usage, bool cesu8)
|
||||
{
|
||||
if (s == nullptr) {
|
||||
return CString("");
|
||||
}
|
||||
return EcmaStringAccessor(const_cast<EcmaString *>(s)).ToCString(usage);
|
||||
return EcmaStringAccessor(const_cast<EcmaString *>(s)).ToCString(usage, cesu8);
|
||||
}
|
||||
|
||||
CString ConvertToString(JSTaggedValue key)
|
||||
|
@ -56,8 +56,9 @@ CString ConvertToString(const std::string &str);
|
||||
std::string PUBLIC_API ConvertToStdString(const CString &str);
|
||||
|
||||
// '\u0000' is skip according to holdZero
|
||||
// cesu8 means non-BMP1 codepoints should encode as 1 utf8 string
|
||||
CString PUBLIC_API ConvertToString(const ecmascript::EcmaString *s,
|
||||
StringConvertedUsage usage = StringConvertedUsage::PRINT);
|
||||
StringConvertedUsage usage = StringConvertedUsage::PRINT, bool cesu8 = false);
|
||||
CString ConvertToString(ecmascript::JSTaggedValue key);
|
||||
|
||||
template<class T>
|
||||
|
@ -514,13 +514,13 @@ void RegExpParser::ParseAlternative(bool isBackward)
|
||||
uint32_t matchedChar = c0_;
|
||||
if (c0_ > (INT8_MAX + 1)) {
|
||||
Prev();
|
||||
int i = 0;
|
||||
UChar32 c;
|
||||
int32_t length = end_ - pc_ + 1;
|
||||
// NOLINTNEXTLINE(hicpp-signed-bitwise)
|
||||
U8_NEXT(pc_, i, length, c); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
auto unicodeChar = base::utf_helper::ConvertUtf8ToUnicodeChar(pc_, length);
|
||||
c = unicodeChar.first;
|
||||
matchedChar = static_cast<uint32_t>(c);
|
||||
pc_ += i; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
pc_ += unicodeChar.second; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|
||||
}
|
||||
if (IsIgnoreCase()) {
|
||||
matchedChar = static_cast<uint32_t>(Canonicalize(static_cast<int>(matchedChar), IsUtf16()));
|
||||
|
@ -11,6 +11,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"b\ude00"
|
||||
"bb"
|
||||
"b"
|
||||
"b\ude00"
|
||||
true
|
||||
true
|
||||
true
|
||||
|
@ -19,6 +19,28 @@
|
||||
* @tc.type: FUNC
|
||||
* @tc.require: issueI5NO8G
|
||||
*/
|
||||
{
|
||||
let str = "😀";
|
||||
let regexp = /[😀]/;
|
||||
print(JSON.stringify(str.replace(regexp,"b")));
|
||||
}
|
||||
{
|
||||
let str = "😀";
|
||||
let regexp = /[😀]/g;
|
||||
print(JSON.stringify(str.replace(regexp,"b")));
|
||||
}
|
||||
{
|
||||
let str = "😀";
|
||||
let regexp = /[😀]/u;
|
||||
print(JSON.stringify(str.replace(regexp,"b")));
|
||||
}
|
||||
{
|
||||
let str = "😀";
|
||||
let regexp = /[\😀]/;
|
||||
print(JSON.stringify(str.replace(regexp,"b")));
|
||||
}
|
||||
|
||||
|
||||
var reg = /[\x5d-\x7e]/i;
|
||||
var result = reg.test("a");
|
||||
print(result);
|
||||
|
Loading…
Reference in New Issue
Block a user