/* * Copyright (c) 2021 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "ecmascript/ecma_string-inl.h" #include "ecmascript/js_symbol.h" #include "ecmascript/mem/c_containers.h" namespace panda::ecmascript { static constexpr int SMALL_STRING_SIZE = 128; EcmaString *EcmaString::Concat(const EcmaVM *vm, const JSHandle &left, const JSHandle &right) { // allocator may trig gc and move src, need to hold it EcmaString *strLeft = *left; EcmaString *strRight = *right; uint32_t leftLength = strLeft->GetLength(); if (leftLength == 0) { return strRight; } uint32_t rightLength = strRight->GetLength(); if (rightLength == 0) { return strLeft; } uint32_t newLength = leftLength + rightLength; if (newLength == 0) { return vm->GetFactory()->GetEmptyString().GetObject(); } bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8()); // if the result string is small, make a LineString if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) { ASSERT(strLeft->IsLineOrConstantString()); ASSERT(strRight->IsLineOrConstantString()); auto newString = CreateLineString(vm, newLength, compressed); // retrieve strings after gc strLeft = *left; strRight = *right; if (compressed) { // copy left part Span sp(newString->GetDataUtf8Writable(), newLength); Span srcLeft(strLeft->GetDataUtf8(), leftLength); EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength); // copy right part sp = sp.SubSpan(leftLength); Span srcRight(strRight->GetDataUtf8(), rightLength); EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength); } else { // copy left part Span sp(newString->GetDataUtf16Writable(), newLength); if (strLeft->IsUtf8()) { EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength); } else { Span srcLeft(strLeft->GetDataUtf16(), leftLength); EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U); } // copy right part sp = sp.SubSpan(leftLength); if (strRight->IsUtf8()) { EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength); } else { Span srcRight(strRight->GetDataUtf16(), rightLength); EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U); } } ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!"); return newString; } return CreateTreeString(vm, left, right, newLength, compressed); } /* static */ EcmaString *EcmaString::FastSubString(const EcmaVM *vm, const JSHandle &src, uint32_t start, uint32_t length) { ASSERT((start + length) <= src->GetLength()); if (length == 0) { return *vm->GetFactory()->GetEmptyString(); } if (start == 0 && length == src->GetLength()) { return *src; } auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); if (srcFlat->IsUtf8()) { return FastSubUtf8String(vm, srcFlat, start, length); } return FastSubUtf16String(vm, srcFlat, start, length); } void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length) { ASSERT(IsLineString() && !IsConstantString()); if (IsUtf8()) { ASSERT(src->IsUtf8()); CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } else if (src->IsUtf8()) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); Span to(GetDataUtf16Writable() + start, length); Span from(data, length); for (uint32_t i = 0; i < length; i++) { to[i] = from[i]; } } else { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (length != 0 && memcpy_s(GetDataUtf16Writable() + start, destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } } template int32_t CompareStringSpan(Span &lhsSp, Span &rhsSp, int32_t count) { for (int32_t i = 0; i < count; ++i) { auto left = static_cast(lhsSp[i]); auto right = static_cast(rhsSp[i]); if (left != right) { return left - right; } } return 0; } int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle &left, const JSHandle &right) { if (*left == *right) { return 0; } auto leftFlat = JSHandle(vm->GetJSThread(), Flatten(vm, left)); auto rightFlat = JSHandle(vm->GetJSThread(), Flatten(vm, right)); EcmaString *lhs = *leftFlat; EcmaString *rhs = *rightFlat; int32_t lhsCount = static_cast(lhs->GetLength()); int32_t rhsCount = static_cast(rhs->GetLength()); int32_t countDiff = lhsCount - rhsCount; int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount; if (!lhs->IsUtf16() && !rhs->IsUtf16()) { Span lhsSp(lhs->GetDataUtf8(), lhsCount); Span rhsSp(rhs->GetDataUtf8(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else if (!lhs->IsUtf16()) { Span lhsSp(lhs->GetDataUtf8(), lhsCount); Span rhsSp(rhs->GetDataUtf16(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else if (!rhs->IsUtf16()) { Span lhsSp(lhs->GetDataUtf16(), rhsCount); Span rhsSp(rhs->GetDataUtf8(), lhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } else { Span lhsSp(lhs->GetDataUtf16(), lhsCount); Span rhsSp(rhs->GetDataUtf16(), rhsCount); int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); if (charDiff != 0) { return charDiff; } } return countDiff; } /* static */ template int32_t EcmaString::IndexOf(Span &lhsSp, Span &rhsSp, int32_t pos, int32_t max) { ASSERT(rhsSp.size() > 0); auto first = static_cast(rhsSp[0]); for (int32_t i = pos; i <= max; i++) { if (static_cast(lhsSp[i]) != first) { i++; while (i <= max && static_cast(lhsSp[i]) != first) { i++; } } /* Found first character, now look at the rest of rhsSp */ if (i <= max) { int j = i + 1; int end = j + static_cast(rhsSp.size()) - 1; for (int k = 1; j < end && static_cast(lhsSp[j]) == static_cast(rhsSp[k]); j++, k++) { } if (j == end) { /* Found whole string. */ return i; } } } return -1; } template int32_t EcmaString::LastIndexOf(Span &lhsSp, Span &rhsSp, int32_t pos) { int rhsSize = static_cast(rhsSp.size()); ASSERT(rhsSize > 0); auto first = rhsSp[0]; for (int32_t i = pos; i >= 0; i--) { if (lhsSp[i] != first) { continue; } /* Found first character, now look at the rest of rhsSp */ int j = 1; while (j < rhsSize) { if (rhsSp[j] != lhsSp[i + j]) { break; } j++; } if (j == rhsSize) { return i; } } return -1; } int32_t EcmaString::IndexOf(const EcmaVM *vm, const JSHandle &receiver, const JSHandle &search, int pos) { EcmaString *lhs = *receiver; EcmaString *rhs = *search; if (lhs == nullptr || rhs == nullptr) { return -1; } int32_t lhsCount = static_cast(lhs->GetLength()); int32_t rhsCount = static_cast(rhs->GetLength()); if (pos > lhsCount) { return -1; } if (rhsCount == 0) { return pos; } if (pos < 0) { pos = 0; } int32_t max = lhsCount - rhsCount; if (max < 0) { return -1; } if (pos + rhsCount > lhsCount) { return -1; } auto receiverFlat = JSHandle(vm->GetJSThread(), Flatten(vm, receiver)); auto searchFlat = JSHandle(vm->GetJSThread(), Flatten(vm, search)); lhs = *receiverFlat; rhs = *searchFlat; if (rhs->IsUtf8() && lhs->IsUtf8()) { Span lhsSp(lhs->GetDataUtf8(), lhsCount); Span rhsSp(rhs->GetDataUtf8(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return) Span lhsSp(lhs->GetDataUtf16(), lhsCount); Span rhsSp(rhs->GetDataUtf16(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } else if (rhs->IsUtf16()) { return -1; } else { // NOLINT(readability-else-after-return) Span lhsSp(lhs->GetDataUtf16(), lhsCount); Span rhsSp(rhs->GetDataUtf8(), rhsCount); return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); } } int32_t EcmaString::LastIndexOf(const EcmaVM *vm, const JSHandle &receiver, const JSHandle &search, int pos) { EcmaString *lhs = *receiver; EcmaString *rhs = *search; if (lhs == nullptr || rhs == nullptr) { return -1; } int32_t lhsCount = static_cast(lhs->GetLength()); int32_t rhsCount = static_cast(rhs->GetLength()); if (lhsCount < rhsCount) { return -1; } if (pos < 0) { pos = 0; } if (pos > lhsCount) { pos = lhsCount; } if (pos + rhsCount > lhsCount) { pos = lhsCount - rhsCount; } if (rhsCount == 0) { return pos; } auto receiverFlat = JSHandle(vm->GetJSThread(), Flatten(vm, receiver)); auto searchFlat = JSHandle(vm->GetJSThread(), Flatten(vm, search)); lhs = *receiverFlat; rhs = *searchFlat; if (rhs->IsUtf8() && lhs->IsUtf8()) { Span lhsSp(lhs->GetDataUtf8(), lhsCount); Span rhsSp(rhs->GetDataUtf8(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return) Span lhsSp(lhs->GetDataUtf16(), lhsCount); Span rhsSp(rhs->GetDataUtf16(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } else if (rhs->IsUtf16()) { return -1; } else { // NOLINT(readability-else-after-return) Span lhsSp(lhs->GetDataUtf16(), lhsCount); Span rhsSp(rhs->GetDataUtf8(), rhsCount); return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); } } std::u16string EcmaString::ToU16String(uint32_t len) { uint32_t length = len > 0 ? len : GetLength(); std::u16string result; if (IsUtf16()) { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); result = base::StringHelper::Utf16ToU16String(data, length); } else { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); result = base::StringHelper::Utf8ToU16String(data, length); } return result; } // static bool EcmaString::CanBeCompressed(const EcmaString *string) { ASSERT(string->IsLineOrConstantString()); if (string->IsUtf8()) { return CanBeCompressed(string->GetDataUtf8(), string->GetLength()); } return CanBeCompressed(string->GetDataUtf16(), string->GetLength()); } // static bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) { bool isCompressed = true; uint32_t index = 0; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) while (index < utf8Len) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (!IsASCIICharacter(utf8Data[index])) { isCompressed = false; break; } ++index; } return isCompressed; } /* static */ bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) { bool isCompressed = true; Span data(utf16Data, utf16Len); for (uint32_t i = 0; i < utf16Len; i++) { if (!IsASCIICharacter(data[i])) { isCompressed = false; break; } } return isCompressed; } bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) { ASSERT(IsLineOrConstantString()); ASSERT(str1->IsLineOrConstantString() && str2->IsLineOrConstantString()); if (GetLength() != str1->GetLength() + str2->GetLength()) { return false; } if (IsUtf16()) { if (str1->IsUtf8() && str2->IsUtf8()) { return false; } if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) { return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength()); } } else { if (str1->IsUtf16() || str2->IsUtf16()) { return false; } Span concatData(GetDataUtf8(), str1->GetLength()); Span data1(str1->GetDataUtf8(), str1->GetLength()); if (EcmaString::StringsAreEquals(concatData, data1)) { concatData = Span(GetDataUtf8() + str1->GetLength(), str2->GetLength()); Span data2(str2->GetDataUtf8(), str2->GetLength()); return EcmaString::StringsAreEquals(concatData, data2); } } return false; } /* static */ bool EcmaString::StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2) { if (str1->IsUtf16()) { CVector buf1; CVector buf2; const uint16_t *data1 = EcmaString::GetUtf16DataFlat(str1, buf1); const uint16_t *data2 = EcmaString::GetUtf16DataFlat(str2, buf2); Span sp1(data1, str1->GetLength()); Span sp2(data2, str2->GetLength()); return EcmaString::StringsAreEquals(sp1, sp2); } else { // NOLINT(readability-else-after-return) CVector buf1; CVector buf2; const uint8_t *data1 = EcmaString::GetUtf8DataFlat(str1, buf1); const uint8_t *data2 = EcmaString::GetUtf8DataFlat(str2, buf2); Span sp1(data1, str1->GetLength()); Span sp2(data2, str2->GetLength()); return EcmaString::StringsAreEquals(sp1, sp2); } } bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle &str1, const JSHandle &str2) { if (str1 == str2) { return true; } if (str1->IsUtf16() != str2->IsUtf16()) { return false; } uint32_t str1Len = str1->GetLength(); if (str1Len != str2->GetLength()) { return false; } if (str1Len == 0) { return true; } uint32_t str1Hash; uint32_t str2Hash; if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { if (str1Hash != str2Hash) { return false; } } auto str1Flat = JSHandle(vm->GetJSThread(), Flatten(vm, str1)); auto str2Flat = JSHandle(vm->GetJSThread(), Flatten(vm, str2)); return StringsAreEqualSameUtfEncoding(*str1Flat, *str2Flat); } /* static */ bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2) { if (str1 == str2) { return true; } if (str1->IsUtf16() != str2->IsUtf16()) { return false; } uint32_t str1Len = str1->GetLength(); if (str1Len != str2->GetLength()) { return false; } if (str1Len == 0) { return true; } uint32_t str1Hash; uint32_t str2Hash; if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { if (str1Hash != str2Hash) { return false; } } return StringsAreEqualSameUtfEncoding(str1, str2); } /* static */ bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress) { if (canBeCompress != str1->IsUtf8()) { return false; } if (canBeCompress && str1->GetLength() != utf8Len) { return false; } if (canBeCompress) { CVector buf; Span data1(EcmaString::GetUtf8DataFlat(str1, buf), utf8Len); Span data2(utf8Data, utf8Len); return EcmaString::StringsAreEquals(data1, data2); } CVector buf; uint32_t length = str1->GetLength(); const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf); return IsUtf8EqualsUtf16(utf8Data, utf8Len, data, length); } /* static */ bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) { uint32_t length = str1->GetLength(); if (length != utf16Len) { return false; } if (str1->IsUtf8()) { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf); return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len); } else { CVector buf; Span data1(EcmaString::GetUtf16DataFlat(str1, buf), length); Span data2(utf16Data, utf16Len); return EcmaString::StringsAreEquals(data1, data2); } } /* static */ template bool EcmaString::StringsAreEquals(Span &str1, Span &str2) { ASSERT(str1.Size() <= str2.Size()); size_t size = str1.Size(); if (size < SMALL_STRING_SIZE) { for (size_t i = 0; i < size; i++) { if (str1[i] != str2[i]) { return false; } } return true; } return memcmp(str1.data(), str2.data(), size * sizeof(T)) == 0; } template bool EcmaString::MemCopyChars(Span &dst, size_t dstMax, Span &src, size_t count) { ASSERT(dstMax >= count); ASSERT(dst.Size() >= src.Size()); if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } return true; } template static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed) { uint32_t hash = hashSeed; Span sp(data, size); for (auto c : sp) { constexpr size_t SHIFT = 5; hash = (hash << SHIFT) - hash + c; } return hash; } uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const { uint32_t hash; uint32_t length = GetLength(); if (IsUtf8()) { CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); hash = ComputeHashForData(data, length, hashSeed); } else { CVector buf; const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); hash = ComputeHashForData(data, length, hashSeed); } return hash; } /* static */ uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) { uint32_t hash = 0; if (canBeCompress) { hash = ComputeHashForData(utf8Data, utf8Len, 0); } else { auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); CVector tmpBuffer(utf16Len); [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf16Len, 0); ASSERT(len == utf16Len); hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0); } return hash; } /* static */ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) { return ComputeHashForData(utf16Data, length, 0); } /* static */ bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, uint32_t utf16Len) { // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data uint32_t utf8ConvertLength = utf16Len + 1; CVector tmpBuffer(utf8ConvertLength); auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0); if (len != utf16Len) { return false; } Span data1(tmpBuffer.data(), len); Span data2(utf16Data, utf16Len); return EcmaString::StringsAreEquals(data1, data2); } bool EcmaString::ToElementIndex(uint32_t *index) { uint32_t len = GetLength(); if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers) return false; } if (UNLIKELY(IsUtf16())) { return false; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); uint32_t c = data[0]; uint64_t n = 0; if (c == '0') { *index = 0; return len == 1; } if (c > '0' && c <= '9') { n = c - '0'; for (uint32_t i = 1; i < len; i++) { c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (c < '0' || c > '9') { return false; } // NOLINTNEXTLINE(readability-magic-numbers) n = n * 10 + (c - '0'); // 10: decimal factor } if (n < JSObject::MAX_ELEMENT_INDEX) { *index = n; return true; } } return false; } bool EcmaString::ToTypedArrayIndex(uint32_t *index) { uint32_t len = GetLength(); if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { return false; } if (UNLIKELY(IsUtf16())) { return false; } CVector buf; const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) uint64_t n = 0; if (c == '0') { *index = 0; return len == 1; } if (c > '0' && c <= '9') { n = c - '0'; for (uint32_t i = 1; i < len; i++) { c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (c >= '0' && c <= '9') { // NOLINTNEXTLINE(readability-magic-numbers) n = n * 10 + (c - '0'); // 10: decimal factor } else if (c == '.') { n = JSObject::MAX_ELEMENT_INDEX; break; } else { return false; } } if (n < JSObject::MAX_ELEMENT_INDEX) { *index = n; return true; } else { *index = JSObject::MAX_ELEMENT_INDEX; return true; } } else if (c == '-') { *index = JSObject::MAX_ELEMENT_INDEX; return true; } return false; } template EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle &src, Span &data, TrimMode mode) { uint32_t srcLen = src->GetLength(); int32_t start = 0; int32_t end = static_cast(srcLen) - 1; if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) { start = static_cast(base::StringHelper::GetStart(data, srcLen)); } if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) { end = base::StringHelper::GetEnd(data, start, srcLen); } EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast(end - start + 1)); return res; } /* static */ EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle &src) { auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); uint32_t srcLength = srcFlat->GetLength(); auto factory = vm->GetFactory(); if (srcFlat->IsUtf16()) { std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength); std::string res = base::StringHelper::ToLower(u16str); return *(factory->NewFromStdString(res)); } else { return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true); } } /* static */ EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle &src) { auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); uint32_t srcLength = srcFlat->GetLength(); const char start = 'A'; const char end = 'Z'; uint32_t upperIndex = srcLength; Span data(srcFlat->GetDataUtf8Writable(), srcLength); for (uint32_t index = 0; index < srcLength; ++index) { if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { upperIndex = index; break; } } if (upperIndex == srcLength) { return *src; } return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true, upperIndex); } /* static */ EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle &srcFlat, bool toLower, uint32_t startIndex) { const char start = toLower ? 'A' : 'a'; const char end = toLower ? 'Z' : 'z'; uint32_t srcLength = srcFlat->GetLength(); auto newString = CreateLineString(vm, srcLength, true); Span data(srcFlat->GetDataUtf8Writable(), srcLength); auto newStringPtr = newString->GetDataUtf8Writable(); if (startIndex > 0) { if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) { LOG_FULL(FATAL) << "memcpy_s failed"; UNREACHABLE(); } } for (uint32_t index = startIndex; index < srcLength; ++index) { if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower } else { *(newStringPtr + index) = data[index]; } } return newString; } /* static */ EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle &src) { auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); uint32_t srcLength = srcFlat->GetLength(); auto factory = vm->GetFactory(); if (srcFlat->IsUtf16()) { std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength); std::string res = base::StringHelper::ToUpper(u16str); return *(factory->NewFromStdString(res)); } else { return ConvertUtf8ToLowerOrUpper(vm, srcFlat, false); } } /* static */ EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle &src, const icu::Locale &locale) { auto factory = vm->GetFactory(); auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); std::u16string utf16 = srcFlat->ToU16String(); std::string res = base::StringHelper::ToLocaleLower(utf16, locale); return *(factory->NewFromStdString(res)); } /* static */ EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle &src, const icu::Locale &locale) { auto factory = vm->GetFactory(); auto srcFlat = JSHandle(vm->GetJSThread(), Flatten(vm, src)); std::u16string utf16 = srcFlat->ToU16String(); std::string res = base::StringHelper::ToLocaleUpper(utf16, locale); return *(factory->NewFromStdString(res)); } EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle &src, TrimMode mode) { auto srcFlat = JSHandle(thread, Flatten(thread->GetEcmaVM(), src)); uint32_t srcLen = srcFlat->GetLength(); if (UNLIKELY(srcLen == 0)) { return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject()); } if (srcFlat->IsUtf8()) { Span data(srcFlat->GetDataUtf8(), srcLen); return TrimBody(thread, srcFlat, data, mode); } else { Span data(srcFlat->GetDataUtf16(), srcLen); return TrimBody(thread, srcFlat, data, mode); } } EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle &string) { auto thread = vm->GetJSThread(); ASSERT(EcmaString::Cast(string->GetSecond())->GetLength() != 0); uint32_t length = string->GetLength(); EcmaString *result = nullptr; if (string->IsUtf8()) { result = CreateLineString(vm, length, true); WriteToFlat(*string, result->GetDataUtf8Writable(), length); } else { result = CreateLineString(vm, length, false); WriteToFlat(*string, result->GetDataUtf16Writable(), length); } string->SetFirst(thread, JSTaggedValue(result)); string->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString())); return result; } EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle &string) { EcmaString *s = *string; if (s->IsLineOrConstantString()) { return s; } if (s->IsTreeString()) { JSHandle tree = JSHandle::Cast(string); if (!tree->IsFlat()) { return SlowFlatten(vm, tree); } s = EcmaString::Cast(tree->GetFirst()); } return s; } EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string) { DISALLOW_GARBAGE_COLLECTION; if (string->IsLineOrConstantString()) { return string; } if (string->IsTreeString()) { TreeEcmaString *tree = TreeEcmaString::Cast(string); if (tree->IsFlat()) { string = EcmaString::Cast(tree->GetFirst()); } else { uint32_t length = tree->GetLength(); EcmaString *result = nullptr; if (tree->IsUtf8()) { result = CreateLineStringNoGC(vm, length, true); WriteToFlat(tree, result->GetDataUtf8Writable(), length); } else { result = CreateLineStringNoGC(vm, length, false); WriteToFlat(tree, result->GetDataUtf16Writable(), length); } tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result)); tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString())); return result; } } return string; } const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector &buf) { ASSERT(src->IsUtf8()); uint32_t length = src->GetLength(); EcmaString *string = const_cast(src); if (string->IsTreeString()) { if (string->IsFlat()) { string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); } else { buf.reserve(length); WriteToFlat(string, buf.data(), length); return buf.data(); } } return string->GetDataUtf8(); } const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector &buf) { ASSERT(src->IsUtf16()); uint32_t length = src->GetLength(); EcmaString *string = const_cast(src); if (string->IsTreeString()) { if (string->IsFlat()) { string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); } else { buf.reserve(length); WriteToFlat(string, buf.data(), length); return buf.data(); } } return string->GetDataUtf16(); } EcmaStringAccessor::EcmaStringAccessor(EcmaString *string) { ASSERT(string != nullptr); string_ = string; } EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj) { ASSERT(obj != nullptr); string_ = EcmaString::Cast(obj); } EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value) { ASSERT(value.IsString()); string_ = EcmaString::Cast(value.GetTaggedObject()); } EcmaStringAccessor::EcmaStringAccessor(const JSHandle &strHandle) : string_(*strHandle) { } std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage) { if (string_ == nullptr) { return ""; } bool modify = (usage != StringConvertedUsage::PRINT); CVector buf; Span sp = string_->ToUtf8Span(buf, modify); std::string res; res.reserve(sp.size()); for (const auto &c : sp) { res.push_back(c); } return res; } CString EcmaStringAccessor::ToCString(StringConvertedUsage usage) { if (string_ == nullptr) { return ""; } bool modify = (usage != StringConvertedUsage::PRINT); CVector buf; Span sp = string_->ToUtf8Span(buf, modify); CString res; res.reserve(sp.size()); for (const auto &c : sp) { res.push_back(c); } return res; } } // namespace panda::ecmascript