From 3d4e8cdff271559da0159dcc3953b767543f34dd Mon Sep 17 00:00:00 2001 From: ginxu Date: Thu, 19 Sep 2024 16:45:49 +0800 Subject: [PATCH] Optimize DecodeURIComponent For AOT Issue: https://gitee.com/openharmony/arkcompiler_ets_runtime/issues/IASG14 Signed-off-by: ginxu Change-Id: I845eb9a997c936719644f7c715971671af90c055 --- ecmascript/base/string_helper.h | 8 + ecmascript/base/utf_helper.h | 8 + ecmascript/builtins/builtins.cpp | 3 +- ecmascript/builtins/builtins_global.h | 50 ++-- .../builtins/builtins_call_signature.h | 12 +- ecmascript/compiler/builtins_lowering.cpp | 12 + ecmascript/compiler/builtins_lowering.h | 1 + .../compiler/typed_bytecode_lowering.cpp | 27 +- ecmascript/compiler/typed_bytecode_lowering.h | 7 + ecmascript/global_env_constants.h | 3 +- .../stubs/runtime_optimized_stubs-inl.h | 236 ++++++++++++++++++ ecmascript/stubs/runtime_stub_list.h | 3 +- ecmascript/stubs/runtime_stubs.cpp | 44 ++++ ecmascript/stubs/runtime_stubs.h | 16 ++ test/aottest/BUILD.gn | 1 + .../builtins_decode_uri_component/BUILD.gn | 22 ++ .../builtins_decode_uri_component.ts | 76 ++++++ .../expect_output.txt | 23 ++ .../pgo_expect_output.txt | 18 ++ 19 files changed, 536 insertions(+), 34 deletions(-) create mode 100644 ecmascript/stubs/runtime_optimized_stubs-inl.h create mode 100644 test/aottest/builtins_decode_uri_component/BUILD.gn create mode 100644 test/aottest/builtins_decode_uri_component/builtins_decode_uri_component.ts create mode 100644 test/aottest/builtins_decode_uri_component/expect_output.txt create mode 100644 test/aottest/builtins_decode_uri_component/pgo_expect_output.txt diff --git a/ecmascript/base/string_helper.h b/ecmascript/base/string_helper.h index 912bf7b979..c8aeef4016 100644 --- a/ecmascript/base/string_helper.h +++ b/ecmascript/base/string_helper.h @@ -193,6 +193,14 @@ public: return idx; } + static inline size_t FindFromU8ToUpper(const std::string &thisStr, uint8_t *u8Data) + { + std::string tmpStr = Utf8ToString(u8Data, 1); + std::transform(tmpStr.begin(), tmpStr.end(), tmpStr.begin(), [](unsigned char c) { return std::toupper(c); }); + size_t idx = Find(thisStr, tmpStr, 0); + return idx; + } + static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp) { int c = *p++; diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h index 594060cd83..9e5468194c 100644 --- a/ecmascript/base/utf_helper.h +++ b/ecmascript/base/utf_helper.h @@ -57,6 +57,8 @@ static constexpr uint8_t BIT_MASK_2 = 0xC0; static constexpr uint8_t BIT_MASK_3 = 0xE0; static constexpr uint8_t BIT_MASK_4 = 0xF0; static constexpr uint8_t BIT_MASK_5 = 0xF8; +static constexpr uint8_t BIT_MASK_FF = 0xFF; +static constexpr uint16_t BIT16_MASK = 0x3FF; static constexpr uint8_t UTF8_1B_MAX = 0x7f; @@ -136,6 +138,12 @@ static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1) } std::pair ConvertUtf8ToUnicodeChar(const uint8_t *utf8, size_t maxLen); + +static inline bool IsHexDigits(uint16_t ch) +{ + return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'); +} + } // namespace panda::ecmascript::base::utf_helper #endif // ECMASCRIPT_BASE_UTF_HELPER_H \ No newline at end of file diff --git a/ecmascript/builtins/builtins.cpp b/ecmascript/builtins/builtins.cpp index 1b90a8e51a..63cfa34116 100644 --- a/ecmascript/builtins/builtins.cpp +++ b/ecmascript/builtins/builtins.cpp @@ -479,7 +479,8 @@ void Builtins::InitializeGlobalObject(const JSHandle &env, const JSHa SetFunction(env, globalObject, "encodeURI", Global::EncodeURI, FunctionLength::ONE); SetFunction(env, globalObject, "escape", Global::Escape, FunctionLength::ONE); SetFunction(env, globalObject, "unescape", Global::Unescape, FunctionLength::ONE); - SetFunction(env, globalObject, "decodeURIComponent", Global::DecodeURIComponent, FunctionLength::ONE); + SetFunction(env, globalObject, "decodeURIComponent", Global::DecodeURIComponent, FunctionLength::ONE, + kungfu::BuiltinsStubCSigns::GlobalDecodeURIComponent); SetFunction(env, globalObject, "encodeURIComponent", Global::EncodeURIComponent, FunctionLength::ONE); SetFunction(env, globalObject, "__getCurrentModuleName__", Global::GetCurrentModuleName, FunctionLength::ZERO); SetFunction(env, globalObject, "__getCurrentBundleName__", Global::GetCurrentBundleName, FunctionLength::ZERO); diff --git a/ecmascript/builtins/builtins_global.h b/ecmascript/builtins/builtins_global.h index 01e069e2c5..2a2fdc5821 100644 --- a/ecmascript/builtins/builtins_global.h +++ b/ecmascript/builtins/builtins_global.h @@ -35,31 +35,31 @@ // The following global object properties are not listed here: // - parseFloat ( string ), listed in builtins_number.h instead. // - parseInt ( string ), listed in builtins_number.h instead. -#define BUILTIN_GLOBAL_FUNCTIONS_COMMON(V) \ - /* decodeURI ( encodedURI ) */ \ - V("decodeURI", DecodeURI, 1, INVALID) \ - /* decodeURIComponent ( encodedURIComponent ) */ \ - V("decodeURIComponent", DecodeURIComponent, 1, INVALID) \ - /* encodeURI ( uri ) */ \ - V("encodeURI", EncodeURI, 1, INVALID) \ - /* encodeURIComponent ( uriComponent ) */ \ - V("encodeURIComponent", EncodeURIComponent, 1, INVALID) \ - /* escape ( string ), defined in B.2.1 */ \ - V("escape", Escape, 1, INVALID) \ - /* eval ( x ), which is NOT supported in ArkTS engine */ \ - V("eval", NotSupportEval, 1, INVALID) \ - /* isFinite ( number ) */ \ - V("isFinite", IsFinite, 1, GlobalIsFinite) \ - /* isNaN ( number ) */ \ - V("isNaN", IsNaN, 1, GlobalIsNan) \ - /* unescape ( string )*/ \ - V("unescape", Unescape, 1, INVALID) \ - /* The following are ArkTS extensions */ \ - V("markModuleCollectable", MarkModuleCollectable, 0, INVALID) \ - V("loadNativeModule", LoadNativeModule, 0, INVALID) \ - V("print", PrintEntrypoint, 0, INVALID) \ - V("isSendable", IsSendable, 0, INVALID) \ - V("__getCurrentModuleName__", GetCurrentModuleName, 0, INVALID) \ +#define BUILTIN_GLOBAL_FUNCTIONS_COMMON(V) \ + /* decodeURI ( encodedURI ) */ \ + V("decodeURI", DecodeURI, 1, INVALID) \ + /* decodeURIComponent ( encodedURIComponent ) */ \ + V("decodeURIComponent", DecodeURIComponent, 1, GlobalDecodeURIComponent) \ + /* encodeURI ( uri ) */ \ + V("encodeURI", EncodeURI, 1, INVALID) \ + /* encodeURIComponent ( uriComponent ) */ \ + V("encodeURIComponent", EncodeURIComponent, 1, INVALID) \ + /* escape ( string ), defined in B.2.1 */ \ + V("escape", Escape, 1, INVALID) \ + /* eval ( x ), which is NOT supported in ArkTS engine */ \ + V("eval", NotSupportEval, 1, INVALID) \ + /* isFinite ( number ) */ \ + V("isFinite", IsFinite, 1, GlobalIsFinite) \ + /* isNaN ( number ) */ \ + V("isNaN", IsNaN, 1, GlobalIsNan) \ + /* unescape ( string )*/ \ + V("unescape", Unescape, 1, INVALID) \ + /* The following are ArkTS extensions */ \ + V("markModuleCollectable", MarkModuleCollectable, 0, INVALID) \ + V("loadNativeModule", LoadNativeModule, 0, INVALID) \ + V("print", PrintEntrypoint, 0, INVALID) \ + V("isSendable", IsSendable, 0, INVALID) \ + V("__getCurrentModuleName__", GetCurrentModuleName, 0, INVALID) \ V("__getCurrentBundleName__", GetCurrentBundleName, 0, INVALID) #if ECMASCRIPT_ENABLE_RUNTIME_STAT #define BUILTIN_GLOBAL_FUNCTIONS_RUNTIME_STAT(V) \ diff --git a/ecmascript/compiler/builtins/builtins_call_signature.h b/ecmascript/compiler/builtins/builtins_call_signature.h index 1293834c10..f5cabce33f 100644 --- a/ecmascript/compiler/builtins/builtins_call_signature.h +++ b/ecmascript/compiler/builtins/builtins_call_signature.h @@ -34,7 +34,8 @@ namespace panda::ecmascript::kungfu { BUILTINS_ARKTOOLS_STUB_BUILDER(D) #define BUILTINS_NOSTUB_LIST(V) \ - V(ObjectConstructor) + V(ObjectConstructor) \ + V(GlobalDecodeURIComponent) #define BUILTINS_METHOD_STUB_LIST(V, T, D, K) \ BUILTINS_WITH_STRING_STUB_BUILDER(V) \ @@ -375,6 +376,7 @@ public: return (BuiltinsStubCSigns::ID::StringLocaleCompare == builtinId) || (BuiltinsStubCSigns::ID::StringIteratorProtoNext == builtinId) || (BuiltinsStubCSigns::ID::ArraySort == builtinId) || + (BuiltinsStubCSigns::ID::GlobalDecodeURIComponent == builtinId) || ((BuiltinsStubCSigns::ID::TYPED_BUILTINS_FIRST <= builtinId) && (builtinId <= BuiltinsStubCSigns::ID::TYPED_BUILTINS_LAST)); } @@ -456,6 +458,11 @@ public: return BuiltinsStubCSigns::ID::NumberConstructor == builtinId; } + static bool IsTypedBuiltinGlobal(ID builtinId) + { + return BuiltinsStubCSigns::ID::GlobalDecodeURIComponent == builtinId; + } + static bool IsTypedBuiltinCallThis0(ID builtinId) { switch (builtinId) { @@ -733,6 +740,8 @@ public: return ConstantIndex::ARRAY_POP_INDEX; case BuiltinsStubCSigns::ID::ArraySlice: return ConstantIndex::ARRAY_SLICE_INDEX; + case BuiltinsStubCSigns::ID::GlobalDecodeURIComponent: + return ConstantIndex::GLOBAL_DECODE_URI_COMPONENT; default: LOG_COMPILER(INFO) << "GetConstantIndex Invalid Id:" << builtinId; return ConstantIndex::INVALID; @@ -993,6 +1002,7 @@ enum class BuiltinsArgs : size_t { #define IS_TYPED_BUILTINS_ID(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltin(id) #define IS_TYPED_INLINE_BUILTINS_ID(id) kungfu::BuiltinsStubCSigns::IsTypedInlineBuiltin(id) #define IS_TYPED_BUILTINS_NUMBER_ID(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltinNumber(id) +#define IS_TYPED_BUILTINS_GLOBAL_ID(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltinGlobal(id) #define IS_TYPED_BUILTINS_ID_CALL_THIS0(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltinCallThis0(id) #define IS_TYPED_BUILTINS_ID_CALL_THIS1(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltinCallThis1(id) #define IS_TYPED_BUILTINS_ID_CALL_THIS3(id) kungfu::BuiltinsStubCSigns::IsTypedBuiltinCallThis3(id) diff --git a/ecmascript/compiler/builtins_lowering.cpp b/ecmascript/compiler/builtins_lowering.cpp index 42d9842439..d2fb4fed1a 100644 --- a/ecmascript/compiler/builtins_lowering.cpp +++ b/ecmascript/compiler/builtins_lowering.cpp @@ -53,6 +53,9 @@ void BuiltinLowering::LowerTypedCallBuitin(GateRef gate) case BUILTINS_STUB_ID(NumberConstructor): LowerNumberConstructor(gate); break; + case BUILTINS_STUB_ID(GlobalDecodeURIComponent): + LowerGlobalDecodeURIComponent(gate); + break; default: break; } @@ -391,6 +394,7 @@ GateRef BuiltinLowering::CheckPara(GateRef gate, GateRef funcCheck) case BuiltinsStubCSigns::ID::TypedArrayEntries: case BuiltinsStubCSigns::ID::TypedArrayKeys: case BuiltinsStubCSigns::ID::TypedArrayValues: + case BuiltinsStubCSigns::ID::GlobalDecodeURIComponent: // Don't need check para return funcCheck; default: { @@ -530,4 +534,12 @@ void BuiltinLowering::LowerNumberConstructor(GateRef gate) builder_.Bind(&exit); ReplaceHirWithValue(gate, *result); } + +void BuiltinLowering::LowerGlobalDecodeURIComponent(GateRef gate) +{ + GateRef glue = acc_.GetGlueFromArgList(); + GateRef param = acc_.GetValueIn(gate, 0); + GateRef result = LowerCallRuntime(glue, gate, RTSTUB_ID(DecodeURIComponent), { param }, true); + ReplaceHirWithValue(gate, result); +} } // namespace panda::ecmascript::kungfu diff --git a/ecmascript/compiler/builtins_lowering.h b/ecmascript/compiler/builtins_lowering.h index 0984c462e2..9b890e6fe3 100644 --- a/ecmascript/compiler/builtins_lowering.h +++ b/ecmascript/compiler/builtins_lowering.h @@ -48,6 +48,7 @@ private: void LowerIteratorNext(GateRef gate, BuiltinsStubCSigns::ID id); void LowerIteratorReturn(GateRef gate, BuiltinsStubCSigns::ID id); void LowerNumberConstructor(GateRef gate); + void LowerGlobalDecodeURIComponent(GateRef gate); Circuit *circuit_ {nullptr}; CircuitBuilder builder_; diff --git a/ecmascript/compiler/typed_bytecode_lowering.cpp b/ecmascript/compiler/typed_bytecode_lowering.cpp index 28d01279f4..40ad9d69d9 100644 --- a/ecmascript/compiler/typed_bytecode_lowering.cpp +++ b/ecmascript/compiler/typed_bytecode_lowering.cpp @@ -1636,9 +1636,6 @@ void TypedBytecodeLowering::LowerTypedSuperCall(GateRef gate) void TypedBytecodeLowering::SpeculateCallBuiltin(GateRef gate, GateRef func, const std::vector &args, BuiltinsStubCSigns::ID id, bool isThrow, bool isSideEffect) { - if (IS_TYPED_INLINE_BUILTINS_ID(id)) { - return; - } if (!Uncheck()) { builder_.CallTargetCheck(gate, func, builder_.IntPtr(static_cast(id)), {args[0]}); } @@ -1652,6 +1649,18 @@ void TypedBytecodeLowering::SpeculateCallBuiltin(GateRef gate, GateRef func, con } } +void TypedBytecodeLowering::SpeculateCallBuiltinFromGlobal(GateRef gate, const std::vector &args, + BuiltinsStubCSigns::ID id, bool isThrow, bool isSideEffect) +{ + GateRef result = builder_.TypedCallBuiltin(gate, args, id, isSideEffect); + + if (isThrow) { + acc_.ReplaceGate(gate, builder_.GetState(), builder_.GetDepend(), result); + } else { + acc_.ReplaceHirAndDeleteIfException(gate, builder_.GetStateDepend(), result); + } +} + void TypedBytecodeLowering::LowerFastCall(GateRef gate, GateRef func, const std::vector &argsFastCall, bool isNoGC) { @@ -1903,9 +1912,17 @@ void TypedBytecodeLowering::LowerTypedCallArg1(GateRef gate) GateRef func = tacc.GetFunc(); GateRef a0Value = tacc.GetValue(); BuiltinsStubCSigns::ID id = tacc.TryGetPGOBuiltinMethodId(); - if (!IS_INVALID_ID(id) && IS_TYPED_BUILTINS_NUMBER_ID(id)) { + if (!IS_INVALID_ID(id) && (IS_TYPED_BUILTINS_NUMBER_ID(id) || IS_TYPED_BUILTINS_GLOBAL_ID(id))) { + if (IS_TYPED_INLINE_BUILTINS_ID(id)) { + return; + } AddProfiling(gate); - SpeculateCallBuiltin(gate, func, { a0Value }, id, true); + if (IsFuncFromGlobal(func)) { + // No need to do CallTargetCheck if func is from LOAD_BUILTIN_OBJECT. + SpeculateCallBuiltinFromGlobal(gate, { a0Value }, id, true); + } else { + SpeculateCallBuiltin(gate, func, { a0Value }, id, true); + } } else { if (!tacc.IsValidCallMethodId()) { return; diff --git a/ecmascript/compiler/typed_bytecode_lowering.h b/ecmascript/compiler/typed_bytecode_lowering.h index 80c0b5e575..0927049a9f 100644 --- a/ecmascript/compiler/typed_bytecode_lowering.h +++ b/ecmascript/compiler/typed_bytecode_lowering.h @@ -222,6 +222,8 @@ private: void SpeculateConditionJump(const ConditionJumpTypeInfoAccessor &tacc, bool flag); void SpeculateCallBuiltin(GateRef gate, GateRef func, const std::vector &args, BuiltinsStubCSigns::ID id, bool isThrow, bool isSideEffect = false); + void SpeculateCallBuiltinFromGlobal(GateRef gate, const std::vector &args, + BuiltinsStubCSigns::ID id, bool isThrow, bool isSideEffect = false); void DeleteConstDataIfNoUser(GateRef gate); bool TryLowerNewBuiltinConstructor(GateRef gate); bool TryLowerTypedLdobjBynameFromGloablBuiltin(GateRef gate); @@ -234,6 +236,11 @@ private: void AddProfiling(GateRef gate); + bool IsFuncFromGlobal(GateRef gate) const + { + return acc_.GetOpCode(gate) == OpCode::LOAD_BUILTIN_OBJECT; + } + bool Uncheck() const { return noCheck_; diff --git a/ecmascript/global_env_constants.h b/ecmascript/global_env_constants.h index de60a71ca1..954af3f120 100644 --- a/ecmascript/global_env_constants.h +++ b/ecmascript/global_env_constants.h @@ -287,7 +287,8 @@ class ObjectFactory; V(JSTaggedValue, ArraySome, ARRAY_SOME_INDEX, ecma_roots_builtins) \ V(JSTaggedValue, ArrayEvery, ARRAY_EVERY_INDEX, ecma_roots_builtins) \ V(JSTaggedValue, ArrayPop, ARRAY_POP_INDEX, ecma_roots_builtins) \ - V(JSTaggedValue, ArraySlice, ARRAY_SLICE_INDEX, ecma_roots_builtins) + V(JSTaggedValue, ArraySlice, ARRAY_SLICE_INDEX, ecma_roots_builtins) \ + V(JSTaggedValue, GloablDecodeURIComponent, GLOBAL_DECODE_URI_COMPONENT, ecma_roots_builtins) // All of type JSTaggedValue #define SHARED_GLOBAL_ENV_CONSTANT_STRING(V) \ diff --git a/ecmascript/stubs/runtime_optimized_stubs-inl.h b/ecmascript/stubs/runtime_optimized_stubs-inl.h new file mode 100644 index 0000000000..3bb0e96ef4 --- /dev/null +++ b/ecmascript/stubs/runtime_optimized_stubs-inl.h @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMASCRIPT_STUBS_RUNTIME_OPTIMIZED_STUBS_INL_H +#define ECMASCRIPT_STUBS_RUNTIME_OPTIMIZED_STUBS_INL_H + +#include "ecmascript/stubs/runtime_stubs.h" +#include "ecmascript/ecma_string-inl.h" + +namespace panda::ecmascript { +template +uint16_t RuntimeStubs::GetCodeUnit(Span &sp, int32_t index, int32_t length) +{ + if ((index < 0) || (index >= length)) { + return 0; + } + return sp[index]; +} + +template +JSTaggedValue RuntimeStubs::DecodePercentEncoding(JSThread *thread, int32_t &n, int32_t &k, + const JSHandle &str, uint8_t &bb, + std::vector &oct, Span &sp, int32_t strLen) +{ + CString errorMsg; + int32_t j = 1; + while (j < n) { + k++; + uint16_t codeUnit = GetCodeUnit(sp, k, strLen); + // b. If the code unit at index k within string is not "%", throw a URIError exception. + // c. If the code units at index (k +1) and (k + 2) within string do not represent hexadecimal + // digits, throw a URIError exception. + if (!(codeUnit == '%')) { + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + + uint16_t frontChart = GetCodeUnit(sp, k + 1, strLen); + uint16_t behindChart = GetCodeUnit(sp, k + 2, strLen); // 2: means plus 2 + if (!(base::utf_helper::IsHexDigits(frontChart) && base::utf_helper::IsHexDigits(behindChart))) { + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + bb = GetValueFromTwoHex(frontChart, behindChart); + // e. If the two most significant bits in B are not 10, throw a URIError exception. + if (!((bb & base::utf_helper::BIT_MASK_2) == base::utf_helper::BIT_MASK_1)) { + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + k += 2; // 2: means plus 2 + oct.push_back(bb); + j++; + } + return JSTaggedValue::True(); +} + +JSTaggedValue RuntimeStubs::UTF16EncodeCodePoint(JSThread *thread, const std::vector &oct, + const JSHandle &str, std::u16string &sStr) +{ + if (!base::utf_helper::IsValidUTF8(oct)) { + CString errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + uint32_t vv = base::StringHelper::Utf8ToU32String(oct); + if (vv < base::utf_helper::DECODE_SECOND_FACTOR) { + sStr = base::StringHelper::Utf16ToU16String(reinterpret_cast(&vv), 1); + } else { + uint16_t lv = (((vv - base::utf_helper::DECODE_SECOND_FACTOR) & base::utf_helper::BIT16_MASK) + + base::utf_helper::DECODE_TRAIL_LOW); + // NOLINT + uint16_t hv = ((((vv - base::utf_helper::DECODE_SECOND_FACTOR) >> 10U) & base::utf_helper::BIT16_MASK) + + base::utf_helper::DECODE_LEAD_LOW); // 10: means shift left by 10 digits + sStr = base::StringHelper::Append(base::StringHelper::Utf16ToU16String(&hv, 1), + base::StringHelper::Utf16ToU16String(&lv, 1)); + } + return JSTaggedValue::True(); +} + +template +JSTaggedValue RuntimeStubs::DecodePercentEncoding(JSThread *thread, const JSHandle &str, int32_t &k, + int32_t strLen, std::u16string &sStr, Span &sp) +{ + [[maybe_unused]] uint32_t start = static_cast(k); + CString errorMsg; + // ii. If k + 2 is greater than or equal to strLen, throw a URIError exception. + // iii. If the code units at index (k+1) and (k + 2) within string do not represent hexadecimal digits, + // throw a URIError exception. + if ((k + 2) >= strLen) { // 2: means plus 2 + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + uint16_t frontChar = GetCodeUnit(sp, k + 1, strLen); + uint16_t behindChar = GetCodeUnit(sp, k + 2, strLen); // 2: means plus 2 + if (!(base::utf_helper::IsHexDigits(frontChar) && base::utf_helper::IsHexDigits(behindChar))) { + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + uint8_t bb = GetValueFromTwoHex(frontChar, behindChar); + k += 2; // 2: means plus 2 + if ((bb & base::utf_helper::BIT_MASK_1) == 0) { + sStr = base::StringHelper::Utf8ToU16String(&bb, 1); + } else { + // vii. Else the most significant bit in B is 1, + // 1. Let n be the smallest nonnegative integer such that (B << n) & 0x80 is equal to 0. + // 3. Let Octets be an array of 8-bit integers of size n. + // 4. Put B into Octets at index 0. + // 6. Let j be 1. + // 7. Repeat, while j < n + // a. Increase k by 1. + // d. Let B be the 8-bit value represented by the two hexadecimal digits at + // index (k + 1) and (k + 2). + // f. Increase k by 2. + // g. Put B into Octets at index j. + // h. Increase j by 1. + // 9. If V < 0x10000, then + // a. Let C be the code unit V. + // b. If C is not in reservedSet, then + // i. Let S be the String containing only the code unit C. + // c. Else C is in reservedSet, + // i. Let S be the substring of string from index start to index k inclusive. + // 10. Else V ≥ 0x10000, + // a. Let L be (((V – 0x10000) & 0x3FF) + 0xDC00). + // b. Let H be ((((V – 0x10000) >> 10) & 0x3FF) + 0xD800). + // c. Let S be the String containing the two code units H and L. + int32_t n = 0; + while ((((static_cast(bb) << static_cast(n)) & base::utf_helper::BIT_MASK_1) != 0)) { + n++; + if (n > 4) { // 4 : 4 means less than 4 + break; + } + } + // 2. If n equals 1 or n is greater than 4, throw a URIError exception. + if ((n == 1) || (n > 4)) { + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + + std::vector oct = {bb}; + + // 5. If k + (3 × (n – 1)) is greater than or equal to strLen, throw a URIError exception. + if (k + (3 * (n - 1)) >= strLen) { // 3: means multiply by 3 + errorMsg = "DecodeURI: invalid character: " + ConvertToString(str.GetTaggedValue()); + THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception()); + } + DecodePercentEncoding(thread, n, k, str, bb, oct, sp, strLen); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + UTF16EncodeCodePoint(thread, oct, str, sStr); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + } + return JSTaggedValue::True(); +} + +template +JSTaggedValue RuntimeStubs::RuntimeDecodeURIComponent(JSThread *thread, const JSHandle &string, + const T *data) +{ + // 1. Let strLen be the number of code units in string. + CString errorMsg; + auto stringAcc = EcmaStringAccessor(string); + int32_t strLen = static_cast(stringAcc.GetLength()); + // 2. Let R be the empty String. + ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); + std::u16string resStr; + std::vector tmpVec; + tmpVec.resize(strLen); + if (LIKELY(strLen != 0)) { + if (memcpy_s(tmpVec.data(), sizeof(T) * strLen, data, sizeof(T) * strLen) != EOK) { + LOG_FULL(FATAL) << "memcpy_s failed"; + UNREACHABLE(); + } + } + Span sp(tmpVec.data(), strLen); + // 3. Let k be 0. + // 4. Repeat + int32_t k = 0; + while (true) { + if (k == strLen) { + // a. If k equals strLen, return R. + auto *uint16tData = reinterpret_cast(resStr.data()); + uint32_t resSize = resStr.size(); + return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue(); + } + + // b. Let C be the code unit at index k within string. + // c. If C is not "%", then + // i. Let S be the String containing only the code unit C. + // d. Else C is "%", + // i. Let start be k. + // iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2). + // v. Increase k by 2. + // vi. If the most significant bit in B is 0, then + // 1. Let C be the code unit with code unit value B. + // 2. If C is not in reservedSet, then + // a. Let S be the String containing only the code unit C. + // 3. Else C is in reservedSet, + // a. Let S be the substring of string from index start to index k inclusive. + uint16_t cc = GetCodeUnit(sp, k, strLen); + std::u16string sStr; + if (cc != '%') { + if (cc == 0 && strLen == 1) { + JSHandle tmpEcmaString = factory->NewFromUtf16Literal(&cc, 1); + return tmpEcmaString.GetTaggedValue(); + } + sStr = base::StringHelper::Utf16ToU16String(&cc, 1); + } else { + DecodePercentEncoding(thread, string, k, strLen, sStr, sp); + RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread); + } + resStr += sStr; + k++; + } +} + +uint8_t RuntimeStubs::GetValueFromTwoHex(uint8_t front, uint8_t behind) +{ + std::string hexString("0123456789ABCDEF"); + size_t idxf = base::StringHelper::FindFromU8ToUpper(hexString, &front); + size_t idxb = base::StringHelper::FindFromU8ToUpper(hexString, &behind); + + uint8_t res = ((idxf << 4U) | idxb) & base::utf_helper::BIT_MASK_FF; // NOLINT 4: means shift left by 4 digits + return res; +} +} // namespace panda::ecmascript +#endif // ECMASCRIPT_STUBS_RUNTIME_OPTIMIZED_STUBS_INL_H \ No newline at end of file diff --git a/ecmascript/stubs/runtime_stub_list.h b/ecmascript/stubs/runtime_stub_list.h index 28189d8817..6977ac0ef9 100644 --- a/ecmascript/stubs/runtime_stub_list.h +++ b/ecmascript/stubs/runtime_stub_list.h @@ -479,7 +479,8 @@ namespace panda::ecmascript { V(GetSharedModule) \ V(SuperCallForwardAllArgs) \ V(OptSuperCallForwardAllArgs) \ - V(GetCollationValueFromIcuCollator) + V(GetCollationValueFromIcuCollator) \ + V(DecodeURIComponent) #define RUNTIME_STUB_LIST(V) \ diff --git a/ecmascript/stubs/runtime_stubs.cpp b/ecmascript/stubs/runtime_stubs.cpp index 1acb42d240..35ad37fa1f 100644 --- a/ecmascript/stubs/runtime_stubs.cpp +++ b/ecmascript/stubs/runtime_stubs.cpp @@ -18,6 +18,7 @@ #include #include +#include "ecmascript/stubs/runtime_optimized_stubs-inl.h" #include "ecmascript/stubs/runtime_stubs-inl.h" #include "ecmascript/base/json_stringifier.h" #include "ecmascript/base/typed_array_helper-inl.h" @@ -3048,6 +3049,49 @@ DEF_RUNTIME_STUBS(TryGetInternString) return RuntimeTryGetInternString(argGlue, string); } +DEF_RUNTIME_STUBS(DecodeURIComponent) +{ + RUNTIME_STUBS_HEADER(DecodeURIComponent); + JSHandle arg = GetHArg(argv, argc, 0); + JSHandle string = JSTaggedValue::ToString(thread, arg); + if (thread->HasPendingException()) { + return JSTaggedValue::VALUE_EXCEPTION; + } + if (EcmaStringAccessor(string).IsTreeString()) { + string = JSHandle(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string)); + } + auto stringAcc = EcmaStringAccessor(string); + JSTaggedValue result; + if (stringAcc.IsLineString()) { + // line string or flatten tree string + if (!stringAcc.IsUtf16()) { + result = RuntimeDecodeURIComponent(thread, string, stringAcc.GetDataUtf8()); + } else { + result = RuntimeDecodeURIComponent(thread, string, stringAcc.GetDataUtf16()); + } + } else if (stringAcc.IsConstantString()) { + ASSERT(stringAcc.IsUtf8()); + result = RuntimeDecodeURIComponent(thread, string, stringAcc.GetDataUtf8()); + } else { + ASSERT(stringAcc.IsSlicedString()); + auto parent = SlicedString::Cast(string.GetTaggedValue())->GetParent(); + auto parentStrAcc = EcmaStringAccessor(parent); + auto startIndex = SlicedString::Cast(string.GetTaggedValue())->GetStartIndex(); + if (parentStrAcc.IsLineString()) { + if (parentStrAcc.IsUtf8()) { + result = RuntimeDecodeURIComponent(thread, string, + parentStrAcc.GetDataUtf8() + startIndex); + } else { + result = RuntimeDecodeURIComponent(thread, string, + parentStrAcc.GetDataUtf16() + startIndex); + } + } else { + result = RuntimeDecodeURIComponent(thread, string, parentStrAcc.GetDataUtf8() + startIndex); + } + } + return result.GetRawData(); +} + JSTaggedType RuntimeStubs::CreateArrayFromList([[maybe_unused]] uintptr_t argGlue, int32_t argc, JSTaggedValue *argvPtr) { diff --git a/ecmascript/stubs/runtime_stubs.h b/ecmascript/stubs/runtime_stubs.h index c4d5c24042..e0a55e89cc 100644 --- a/ecmascript/stubs/runtime_stubs.h +++ b/ecmascript/stubs/runtime_stubs.h @@ -517,6 +517,22 @@ private: const JSHandle &classLiteral); static inline JSTaggedType RuntimeTryGetInternString(uintptr_t argGlue, const JSHandle &string); static inline void RuntimeSetPatchModule(JSThread *thread, const JSHandle &func); + template + static inline JSTaggedValue RuntimeDecodeURIComponent(JSThread *thread, const JSHandle &str, + const T *data); + template + static inline uint16_t GetCodeUnit(Span &sp, int32_t index, int32_t length); + template + static inline JSTaggedValue DecodePercentEncoding(JSThread *thread, const JSHandle &str, int32_t &k, + int32_t strLen, std::u16string &sStr, Span &sp); + template + static inline JSTaggedValue DecodePercentEncoding(JSThread *thread, int32_t &n, int32_t &k, + const JSHandle &str, uint8_t &bb, + std::vector &oct, Span &sp, + int32_t strLen); + static inline JSTaggedValue UTF16EncodeCodePoint(JSThread *thread, const std::vector &oct, + const JSHandle &str, std::u16string &sStr); + static inline uint8_t GetValueFromTwoHex(uint8_t front, uint8_t behind); friend class SlowRuntimeStub; }; } // namespace panda::ecmascript diff --git a/test/aottest/BUILD.gn b/test/aottest/BUILD.gn index 45b430ff7e..ca59d55295 100644 --- a/test/aottest/BUILD.gn +++ b/test/aottest/BUILD.gn @@ -74,6 +74,7 @@ group("ark_aot_ts_test") { "bind", "binaryop_special_value", "builtinmath", + "builtins_decode_uri_component", "builtins_string", "call_builtin_objects", "call_same_bytecode_func", diff --git a/test/aottest/builtins_decode_uri_component/BUILD.gn b/test/aottest/builtins_decode_uri_component/BUILD.gn new file mode 100644 index 0000000000..60fccad608 --- /dev/null +++ b/test/aottest/builtins_decode_uri_component/BUILD.gn @@ -0,0 +1,22 @@ +# Copyright (c) 2024 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//arkcompiler/ets_runtime/test/test_helper.gni") + +host_aot_test_action("builtins_decode_uri_component") { + deps = [] + is_only_typed_path = true + is_enable_pgo = true + is_enable_typed_op_profiler = true + log_option = " --log-info=trace" +} diff --git a/test/aottest/builtins_decode_uri_component/builtins_decode_uri_component.ts b/test/aottest/builtins_decode_uri_component/builtins_decode_uri_component.ts new file mode 100644 index 0000000000..4edb179fa4 --- /dev/null +++ b/test/aottest/builtins_decode_uri_component/builtins_decode_uri_component.ts @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function func1() { + if (decodeURIComponent("") !== "") { + return false; + } + ArkTools.printTypedOpProfiler("TYPED_CALL_BUILTIN"); + ArkTools.clearTypedOpProfiler(); + return true; +} + +function func2() { + if (decodeURIComponent("http:%2f%2Fwww.runoob.ru/support/jobs/bin/static.py%3Fpage%3dwhy-ru.html%26sid%3Dliveandwork") + !== "http://www.runoob.ru/support/jobs/bin/static.py?page=why-ru.html&sid=liveandwork") { + return false; + } + ArkTools.printTypedOpProfiler("TYPED_CALL_BUILTIN"); + ArkTools.clearTypedOpProfiler(); + return true; +} + +// tree string +function func3() { + let s1 = "http:%2f%2Fwww.runoob.ru/support/jobs/bin/static.py"; + let s2 = "%3Fpage%3dwhy-ru.html%26sid%3Dliveandwork"; + let uri = s1.concat(s2); + if (decodeURIComponent(uri) !== "http://www.runoob.ru/support/jobs/bin/static.py?page=why-ru.html&sid=liveandwork") { + return false; + } + ArkTools.printTypedOpProfiler("TYPED_CALL_BUILTIN"); + ArkTools.clearTypedOpProfiler(); + return true; +} + +// sliced string +function func4() { + let string = "http:%2f%2Fwww.runoob.ru/support/jobs/bin/static.py%3Fpage%3dwhy-ru.html%26sid%3Dliveandwork"; + let uri = string.substring(0, 24); + if (decodeURIComponent(uri) !== "http://www.runoob.ru") { + return false; + } + ArkTools.printTypedOpProfiler("TYPED_CALL_BUILTIN"); + ArkTools.clearTypedOpProfiler(); + return true; +} + +// utf16 +function func5() { + let uri = "http:%2f%2Fwww.runoob好.ru/support/jobs/bin/static.py%3Fpage%3dwhy-ru.html%26sid%3Dliveandwork"; + if (decodeURIComponent(uri) !== + "http://www.runoob好.ru/support/jobs/bin/static.py?page=why-ru.html&sid=liveandwork") { + return false; + } + ArkTools.printTypedOpProfiler("TYPED_CALL_BUILTIN"); + ArkTools.clearTypedOpProfiler(); + return true; +} + +print(func1()) +print(func2()) +print(func3()) +print(func4()) +print(func5()) \ No newline at end of file diff --git a/test/aottest/builtins_decode_uri_component/expect_output.txt b/test/aottest/builtins_decode_uri_component/expect_output.txt new file mode 100644 index 0000000000..d1072032ad --- /dev/null +++ b/test/aottest/builtins_decode_uri_component/expect_output.txt @@ -0,0 +1,23 @@ +# Copyright (c) 2024 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[trace] Opcode: TYPED_CALL_BUILTIN Count:1 +true +[trace] Opcode: TYPED_CALL_BUILTIN Count:1 +true +[trace] Opcode: TYPED_CALL_BUILTIN Count:1 +true +[trace] Opcode: TYPED_CALL_BUILTIN Count:1 +true +[trace] Opcode: TYPED_CALL_BUILTIN Count:1 +true diff --git a/test/aottest/builtins_decode_uri_component/pgo_expect_output.txt b/test/aottest/builtins_decode_uri_component/pgo_expect_output.txt new file mode 100644 index 0000000000..aafbdd6edd --- /dev/null +++ b/test/aottest/builtins_decode_uri_component/pgo_expect_output.txt @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +true +true +true +true +true