diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index f736e47200..3814200fa7 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -44,14 +44,14 @@ bool IsUTF16LowSurrogate(uint16_t ch)
 }
 
 // Methods for decode utf16 to unicode
-uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index)
+uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index, bool cesu8)
 {
     uint16_t high = utf16[*index];
     if ((high & SURROGATE_MASK) != DECODE_LEAD_LOW || !IsUTF16HighSurrogate(high) || *index == len - 1) {
         return high;
     }
     uint16_t low = utf16[*index + 1];
-    if (!IsUTF16LowSurrogate(low)) {
+    if (!IsUTF16LowSurrogate(low) || cesu8) {
         return high;
     }
     (*index)++;
@@ -221,7 +221,7 @@ Utf8Char ConvertUtf16ToUtf8(uint16_t d0, uint16_t d1, bool modify, bool isWriteB
     return {UtfLength::FOUR, {ch0, ch1, ch2, ch3}};
 }
 
-size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool isGetBufferSize)
+size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool isGetBufferSize, bool cesu8)
 {
     size_t res = 1;  // zero byte
     // when utf16 data length is only 1 and code in 0xd800-0xdfff,
@@ -247,7 +247,7 @@ size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool
         } else if (utf16[i] < utf::HI_SURROGATE_MIN || utf16[i] > utf::HI_SURROGATE_MAX) {
             res += UtfLength::THREE;
         } else {
-            if (i < length - 1 &&
+            if (!cesu8 && i < length - 1 &&
                 utf16[i + 1] >= utf::LO_SURROGATE_MIN &&  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                 utf16[i + 1] <= utf::LO_SURROGATE_MAX) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                 res += UtfLength::FOUR;
@@ -261,7 +261,7 @@ size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify, bool
 }
 
 size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
-                                size_t start, bool modify, bool isWriteBuffer)
+                                size_t start, bool modify, bool isWriteBuffer, bool cesu8)
 {
     if (utf16In == nullptr || utf8Out == nullptr || utf8Len == 0) {
         return 0;
@@ -269,7 +269,7 @@ size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_
     size_t utf8Pos = 0;
     size_t end = start + utf16Len;
     for (size_t i = start; i < end; ++i) {
-        uint32_t codepoint = DecodeUTF16(utf16In, end, &i);
+        uint32_t codepoint = DecodeUTF16(utf16In, end, &i, cesu8);
         if (codepoint == 0) {
             if (isWriteBuffer) {
                 utf8Out[utf8Pos++] = 0x00U;
diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h
index 9e5468194c..7216b8e548 100644
--- a/ecmascript/base/utf_helper.h
+++ b/ecmascript/base/utf_helper.h
@@ -100,7 +100,7 @@ struct Utf8Char {
 
 static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
 
-uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index);
+uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index, bool cesu8 = false);
 
 size_t EncodeUTF8(uint32_t codepoint, uint8_t* utf8, size_t len, size_t index);
 
@@ -110,10 +110,12 @@ bool IsValidUTF8(const std::vector<uint8_t> &data);
 
 Utf8Char ConvertUtf16ToUtf8(uint16_t d0, uint16_t d1, bool modify, bool isWriteBuffer = false);
 
-size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify = true, bool isGetBufferSize = false);
+size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify = true,
+                       bool isGetBufferSize = false, bool cesu8 = false);
 
-size_t PUBLIC_API ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
-                                           size_t start, bool modify = true, bool isWriteBuffer = false);
+size_t PUBLIC_API ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len,
+                                           size_t utf8Len, size_t start, bool modify = true,
+                                           bool isWriteBuffer = false, bool cesu = false);
 
 size_t DebuggerConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len,
                                         size_t start, bool modify = true, bool isWriteBuffer = false);
diff --git a/ecmascript/builtins/builtins_regexp.cpp b/ecmascript/builtins/builtins_regexp.cpp
index c8cd74836c..b63df51bbc 100644
--- a/ecmascript/builtins/builtins_regexp.cpp
+++ b/ecmascript/builtins/builtins_regexp.cpp
@@ -2361,7 +2361,8 @@ JSTaggedValue BuiltinsRegExp::RegExpInitialize(JSThread *thread, const JSHandle<
     auto getCache = regExpParserCache->GetCache(*patternStrHandle, flagsBits, groupName);
     if (getCache.first.IsHole()) {
         // String -> CString
-        CString patternStdStr = ConvertToString(*patternStrHandle, StringConvertedUsage::LOGICOPERATION);
+        bool cesu8 = !(RegExpParser::FLAG_UTF16 & flagsBits);
+        CString patternStdStr = ConvertToString(*patternStrHandle, StringConvertedUsage::LOGICOPERATION, cesu8);
         parser.Init(const_cast<char *>(reinterpret_cast<const char *>(patternStdStr.c_str())), patternStdStr.size(),
                     flagsBits);
         parser.Parse();
diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp
index 5464b5445b..9109cab901 100755
--- a/ecmascript/ecma_string.cpp
+++ b/ecmascript/ecma_string.cpp
@@ -1595,14 +1595,14 @@ std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
     return res;
 }
 
-CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
+CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
 {
     if (string_ == nullptr) {
         return "";
     }
     bool modify = (usage != StringConvertedUsage::PRINT);
     CVector<uint8_t> buf;
-    Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
+    Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
     CString res;
     res.reserve(sp.size());
     for (const auto &c : sp) {
diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h
index b5b954ea48..944b19f31d 100755
--- a/ecmascript/ecma_string.h
+++ b/ecmascript/ecma_string.h
@@ -553,17 +553,17 @@ private:
         return std::unique_ptr<uint8_t[]>(buf);
     }
 
-    Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
+    Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
     {
         Span<const uint8_t> str;
         uint32_t strLen = GetLength();
         if (UNLIKELY(IsUtf16())) {
             CVector<uint16_t> tmpBuf;
             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
-            ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) > 0);
-            size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
+            ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
+            size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
             buf.reserve(len);
-            len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
+            len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
             str = Span<const uint8_t>(buf.data(), len);
         } else {
             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
@@ -1258,7 +1258,7 @@ public:
     std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
     // not change string data structure.
     // if string is not flat, this func has low efficiency.
-    CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION);
+    CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
 
     // not change string data structure.
     // if string is not flat, this func has low efficiency.
diff --git a/ecmascript/mem/c_string.cpp b/ecmascript/mem/c_string.cpp
index f9e6458888..2b4b7fd2bb 100644
--- a/ecmascript/mem/c_string.cpp
+++ b/ecmascript/mem/c_string.cpp
@@ -94,12 +94,12 @@ CString ConvertToString(const std::string &str)
     return res;
 }
 
-CString ConvertToString(const EcmaString *s, StringConvertedUsage usage)
+CString ConvertToString(const EcmaString *s, StringConvertedUsage usage, bool cesu8)
 {
     if (s == nullptr) {
         return CString("");
     }
-    return EcmaStringAccessor(const_cast<EcmaString *>(s)).ToCString(usage);
+    return EcmaStringAccessor(const_cast<EcmaString *>(s)).ToCString(usage, cesu8);
 }
 
 CString ConvertToString(JSTaggedValue key)
diff --git a/ecmascript/mem/c_string.h b/ecmascript/mem/c_string.h
index 51a08f8e87..77761bfdbf 100644
--- a/ecmascript/mem/c_string.h
+++ b/ecmascript/mem/c_string.h
@@ -56,8 +56,9 @@ CString ConvertToString(const std::string &str);
 std::string PUBLIC_API ConvertToStdString(const CString &str);
 
 // '\u0000' is skip according to holdZero
+// cesu8 means non-BMP1 codepoints should encode as 1 utf8 string
 CString PUBLIC_API ConvertToString(const ecmascript::EcmaString *s,
-    StringConvertedUsage usage = StringConvertedUsage::PRINT);
+    StringConvertedUsage usage = StringConvertedUsage::PRINT, bool cesu8 = false);
 CString ConvertToString(ecmascript::JSTaggedValue key);
 
 template<class T>
diff --git a/ecmascript/regexp/regexp_parser.cpp b/ecmascript/regexp/regexp_parser.cpp
index d186f19e52..afd732584d 100644
--- a/ecmascript/regexp/regexp_parser.cpp
+++ b/ecmascript/regexp/regexp_parser.cpp
@@ -514,13 +514,13 @@ void RegExpParser::ParseAlternative(bool isBackward)
                     uint32_t matchedChar = c0_;
                     if (c0_ > (INT8_MAX + 1)) {
                         Prev();
-                        int i = 0;
                         UChar32 c;
                         int32_t length = end_ - pc_ + 1;
                         // NOLINTNEXTLINE(hicpp-signed-bitwise)
-                        U8_NEXT(pc_, i, length, c);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+                        auto unicodeChar = base::utf_helper::ConvertUtf8ToUnicodeChar(pc_, length);
+                        c = unicodeChar.first;
                         matchedChar = static_cast<uint32_t>(c);
-                        pc_ += i;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+                        pc_ += unicodeChar.second;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
                     }
                     if (IsIgnoreCase()) {
                         matchedChar = static_cast<uint32_t>(Canonicalize(static_cast<int>(matchedChar), IsUtf16()));
diff --git a/test/moduletest/regexp/expect_output.txt b/test/moduletest/regexp/expect_output.txt
index 9aa2cc2062..aefdada78e 100644
--- a/test/moduletest/regexp/expect_output.txt
+++ b/test/moduletest/regexp/expect_output.txt
@@ -11,6 +11,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"b\ude00"
+"bb"
+"b"
+"b\ude00"
 true
 true
 true
diff --git a/test/moduletest/regexp/regexp.js b/test/moduletest/regexp/regexp.js
index a16bb6b3ac..b0ddd09353 100644
--- a/test/moduletest/regexp/regexp.js
+++ b/test/moduletest/regexp/regexp.js
@@ -19,6 +19,28 @@
  * @tc.type: FUNC
  * @tc.require: issueI5NO8G
  */
+{
+  let str = "😀";
+  let regexp = /[😀]/;
+  print(JSON.stringify(str.replace(regexp,"b")));
+}
+{
+  let str = "😀";
+  let regexp = /[😀]/g;
+  print(JSON.stringify(str.replace(regexp,"b")));
+}
+{
+  let str = "😀";
+  let regexp = /[😀]/u;
+  print(JSON.stringify(str.replace(regexp,"b")));
+}
+{
+  let str = "😀";
+  let regexp = /[\😀]/;
+  print(JSON.stringify(str.replace(regexp,"b")));
+}
+
+
 var reg = /[\x5d-\x7e]/i;
 var result = reg.test("a");
 print(result);