diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index bdf97f5fd..a9dde32bd 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -65,7 +65,7 @@ jobs: # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install - name: Build working-directory: ${{runner.workspace}}/build diff --git a/.github/workflows/glibc_fault.yml b/.github/workflows/glibc_fault.yml index 4d57128c3..549ab074e 100644 --- a/.github/workflows/glibc_fault.yml +++ b/.github/workflows/glibc_fault.yml @@ -73,7 +73,7 @@ jobs: # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install - name: Build working-directory: ${{runner.workspace}}/build diff --git a/.github/workflows/mingw_build.yml b/.github/workflows/mingw_build.yml index 7ab74c2a4..b8e289578 100644 --- a/.github/workflows/mingw_build.yml +++ b/.github/workflows/mingw_build.yml @@ -74,7 +74,7 @@ jobs: # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=False -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install - name: Build working-directory: ${{runner.workspace}}/build diff --git a/CMakeLists.txt b/CMakeLists.txt index 5af01db47..8150e25e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,6 @@ option(ENABLE_JEMALLOC_GLIBC_ALLOC "Enables jemalloc glibc allocator" TRUE) option(ENABLE_OFFLINE_TELEMETRY "Enables FEX offline telemetry" TRUE) option(ENABLE_COMPILE_TIME_TRACE "Enables time trace compile option" FALSE) option(ENABLE_LIBCXX "Enables LLVM libc++" FALSE) -option(ENABLE_INTERPRETER "Enables FEX's Interpreter" FALSE) option(ENABLE_CCACHE "Enables ccache for compile caching" TRUE) option(ENABLE_TERMUX_BUILD "Forces building for Termux on a non-Termux build machine" FALSE) option(ENABLE_VIXL_SIMULATOR "Forces the FEX JIT to use the VIXL simulator" FALSE) @@ -97,11 +96,6 @@ if (ENABLE_GDB_SYMBOLS) endif() -if (ENABLE_INTERPRETER) - message(STATUS "Interpreter enabled") - add_definitions(-DINTERPRETER_ENABLED=1) -endif() - set(CMAKE_CXX_STANDARD 20) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Bin) diff --git a/FEXCore/Source/CMakeLists.txt b/FEXCore/Source/CMakeLists.txt index 3e2e4b232..614a34c31 100644 --- a/FEXCore/Source/CMakeLists.txt +++ b/FEXCore/Source/CMakeLists.txt @@ -159,23 +159,6 @@ if (ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT) Utils/AllocatorOverride.cpp) endif() -if (ENABLE_INTERPRETER) - list(APPEND SRCS - Interface/Core/Interpreter/InterpreterCore.cpp - Interface/Core/Interpreter/InterpreterOps.cpp - Interface/Core/Interpreter/ALUOps.cpp - Interface/Core/Interpreter/AtomicOps.cpp - Interface/Core/Interpreter/BranchOps.cpp - Interface/Core/Interpreter/ConversionOps.cpp - Interface/Core/Interpreter/EncryptionOps.cpp - Interface/Core/Interpreter/F80Ops.cpp - Interface/Core/Interpreter/FlagOps.cpp - Interface/Core/Interpreter/MemoryOps.cpp - Interface/Core/Interpreter/MiscOps.cpp - Interface/Core/Interpreter/MoveOps.cpp - Interface/Core/Interpreter/VectorOps.cpp) -endif() - set(DEFINES -DTHREAD_LOCAL=_Thread_local) if (_M_X86_64) diff --git a/FEXCore/Source/Interface/Config/Config.cpp b/FEXCore/Source/Interface/Config/Config.cpp index 65cf20b48..4686fe0a4 100644 --- a/FEXCore/Source/Interface/Config/Config.cpp +++ b/FEXCore/Source/Interface/Config/Config.cpp @@ -339,11 +339,7 @@ namespace DefaultValues { #else constexpr uint32_t MaxCoreNumber = 1; #endif -#ifdef INTERPRETER_ENABLED - constexpr uint32_t MinCoreNumber = 0; -#else constexpr uint32_t MinCoreNumber = 1; -#endif if (Core > MaxCoreNumber || Core < MinCoreNumber) { // Sanitize the core option by setting the core to the JIT if invalid FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast(FEXCore::Config::CONFIG_IRJIT))); @@ -353,11 +349,6 @@ namespace DefaultValues { if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION)) { FEX_CONFIG_OPT(CacheObjectCodeCompilation, CACHEOBJECTCODECOMPILATION); FEX_CONFIG_OPT(Core, CORE); - - if (CacheObjectCodeCompilation() && Core() == FEXCore::Config::CONFIG_INTERPRETER) { - // If running the interpreter then disable cache code compilation - FEXCore::Config::Erase(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION); - } } fextl::string ContainerPrefix { FindContainerPrefix() }; diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index 00de4a072..03a7c0f5b 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -6,12 +6,12 @@ "Default": "FEXCore::Config::ConfigCore::CONFIG_IRJIT", "TextDefault": "irjit", "ShortArg": "c", - "Choices": [ "irint", "irjit", "host" ], + "Choices": [ "irjit", "host" ], "ArgumentHandler": "CoreHandler", "Desc": [ "Which CPU core to use", "host only exists on x86_64", - "[irint, irjit, host]" + "[irjit, host]" ] }, "Multiblock": { diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 27dfa4f43..3383379b5 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -308,11 +308,6 @@ namespace FEXCore::Context { FEXCore::Core::InternalThreadState* ContextImpl::InitCore(uint64_t InitialRIP, uint64_t StackPointer) { // Initialize the CPU core signal handlers & DispatcherConfig switch (Config.Core) { -#ifdef INTERPRETER_ENABLED - case FEXCore::Config::CONFIG_INTERPRETER: - BackendFeatures = FEXCore::CPU::GetInterpreterBackendFeatures(); - break; -#endif case FEXCore::Config::CONFIG_IRJIT: #if (_M_X86_64 && JIT_X86_64) BackendFeatures = FEXCore::CPU::GetX86JITBackendFeatures(); @@ -671,11 +666,6 @@ namespace FEXCore::Context { // Create CPU backend switch (Config.Core) { -#ifdef INTERPRETER_ENABLED - case FEXCore::Config::CONFIG_INTERPRETER: - Thread->CPUBackend = FEXCore::CPU::CreateInterpreterCore(this, Thread); - break; -#endif case FEXCore::Config::CONFIG_IRJIT: Thread->PassManager->InsertRegisterAllocationPass(DoSRA, HostFeatures.SupportsAVX); diff --git a/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp deleted file mode 100644 index 1eea51b23..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp +++ /dev/null @@ -1,1264 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) - -DEF_OP(TruncElementPair) { - auto Op = IROp->C(); - - switch (IROp->Size) { - case 4: { - uint64_t *Src = GetSrc(Data->SSAData, Op->Pair); - uint64_t Result{}; - Result = Src[0] & ~0U; - Result |= Src[1] << 32; - GD = Result; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Truncation size: {}", IROp->Size); break; - } -} - -DEF_OP(Constant) { - auto Op = IROp->C(); - GD = Op->Constant; -} - -DEF_OP(EntrypointOffset) { - auto Op = IROp->C(); - uint64_t Mask = ~0ULL; - uint8_t OpSize = IROp->Size; - if (OpSize == 4) { - Mask = 0xFFFF'FFFFULL; - } - - GD = (Data->CurrentEntry + Op->Offset) & Mask; -} - -DEF_OP(InlineConstant) { - //nop -} - -DEF_OP(InlineEntrypointOffset) { - //nop -} - -DEF_OP(CycleCounter) { -#ifdef DEBUG_CYCLES - GD = 0; -#else - timespec time; - clock_gettime(CLOCK_REALTIME, &time); - GD = time.tv_nsec + time.tv_sec * 1000000000; -#endif -} - -#define GRS(Node) (IROp->Size <= 4 ? GetReg(Node) : GetReg(Node)) - -DEF_OP(Add) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - auto *Src1 = GetSrc(Data->SSAData, Op->Src1); - auto *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto Func = [](auto a, auto b) { return a + b; }; - - switch (OpSize) { - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(AddNZCV) { - auto Op = IROp->C(); - const uint8_t OpSize = Op->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - // Results returned in Arm64 NZCV format - // N = Sign bit - // Z = Is Zero - // C = Carry occured (Unsigned result can't fit within resulting register) - // V = Overflow occured (Signed result can't fit in to resulting register) - uint32_t NZCV{}; - switch (OpSize) { - case 4: { - uint32_t Result = Src1 + Src2; - int32_t ResultSigned{}; - if ((Result >> (sizeof(uint32_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (Result == 0) { - NZCV |= 1U << 30; - } - if (__builtin_uadd_overflow(Src1, Src2, &Result)) { - NZCV |= 1U << 29; - } - if (__builtin_sadd_overflow(Src1, Src2, &ResultSigned)) { - NZCV |= 1U << 28; - } - break; - } - case 8: { - uint64_t Result = Src1 + Src2; - int64_t ResultSigned{}; - if ((Result >> (sizeof(uint64_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (Result == 0) { - NZCV |= 1U << 30; - } - if (__builtin_uaddl_overflow(Src1, Src2, &Result)) { - NZCV |= 1U << 29; - } - if (__builtin_saddl_overflow(Src1, Src2, &ResultSigned)) { - NZCV |= 1U << 28; - } - break; - } - default: LOGMAN_MSG_A_FMT("Unknown {} Size: {}\n", __func__, OpSize); break; - } - GD = NZCV; -} - -DEF_OP(TestNZ) { - auto Op = IROp->C(); - const uint8_t OpSize = Op->Size; - - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src1); - // Results returned in Arm64 NZCV format - // N = Sign bit - // Z = Is Zero - // CV = 00 - uint32_t NZCV{}; - switch (OpSize) { - case 4: - if ((Src >> (sizeof(uint32_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (static_cast(Src) == 0) { - NZCV |= 1U << 30; - } - break; - case 8: - if ((Src >> (sizeof(uint64_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (Src == 0) { - NZCV |= 1U << 30; - } - break; - default: LOGMAN_MSG_A_FMT("Unknown {} Size: {}\n", __func__, OpSize); break; - } - GD = NZCV; -} - -DEF_OP(Sub) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto Func = [](auto a, auto b) { return a - b; }; - - switch (OpSize) { - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(SubNZCV) { - auto Op = IROp->C(); - const uint8_t OpSize = Op->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - // Results returned in Arm64 NZCV format - // N = Sign bit - // Z = Is Zero - // C = Carry occured (Unsigned result can't fit within resulting register) - // V = Overflow occured (Signed result can't fit in to resulting register) - uint32_t NZCV{}; - switch (OpSize) { - case 4: { - uint32_t Result = Src1 - Src2; - int32_t ResultSigned{}; - if ((Result >> (sizeof(uint32_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (Result == 0) { - NZCV |= 1U << 30; - } - if (__builtin_usub_overflow(Src1, Src2, &Result) ^ !(Op->InvertCarry)) { - NZCV |= 1U << 29; - } - if (__builtin_ssub_overflow(Src1, Src2, &ResultSigned)) { - NZCV |= 1U << 28; - } - break; - } - case 8: { - uint64_t Result = Src1 - Src2; - int64_t ResultSigned{}; - if ((Result >> (sizeof(uint64_t) * 8 - 1)) & 1) { - NZCV |= 1U << 31; - } - if (Result == 0) { - NZCV |= 1U << 30; - } - if (__builtin_usubl_overflow(Src1, Src2, &Result) ^ !(Op->InvertCarry)) { - NZCV |= 1U << 29; - } - if (__builtin_ssubl_overflow(Src1, Src2, &ResultSigned)) { - NZCV |= 1U << 28; - } - break; - } - default: LOGMAN_MSG_A_FMT("Unknown {} Size: {}\n", __func__, OpSize); break; - } - GD = NZCV; -} - -DEF_OP(Neg) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src); - switch (OpSize) { - case 4: - GD = -static_cast(Src); - break; - case 8: - GD = -static_cast(Src); - break; - default: LOGMAN_MSG_A_FMT("Unknown NEG Size: {}\n", OpSize); break; - } -} - -DEF_OP(Abs) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const int64_t Src = *GetSrc(Data->SSAData, Op->Src); - switch (OpSize) { - case 4: - GD = std::abs(static_cast(Src)); - break; - case 8: - GD = std::abs(static_cast(Src)); - break; - default: LOGMAN_MSG_A_FMT("Unknown Abs Size: {}\n", OpSize); break; - } -} - -DEF_OP(Mul) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 4: - GD = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); - break; - case 8: - GD = static_cast(Src1) * static_cast(Src2); - break; - case 16: { - __int128_t Tmp = static_cast<__int128_t>(static_cast(Src1)) * static_cast<__int128_t>(static_cast(Src2)); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown Mul Size: {}\n", OpSize); break; - } -} - -DEF_OP(UMul) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 4: - GD = static_cast(Src1) * static_cast(Src2); - break; - case 8: - GD = static_cast(Src1) * static_cast(Src2); - break; - case 16: { - __uint128_t Tmp = static_cast<__uint128_t>(static_cast(Src1)) * static_cast<__uint128_t>(static_cast(Src2)); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown UMul Size: {}\n", OpSize); break; - } -} - -DEF_OP(Div) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 1: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 2: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 4: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 8: - GD = static_cast(Src1) / static_cast(Src2); - break; - case 16: { - __int128_t Tmp = *GetSrc<__int128_t*>(Data->SSAData, Op->Src1) / *GetSrc<__int128_t*>(Data->SSAData, Op->Src2); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown Mul Size: {}\n", OpSize); break; - } -} - -DEF_OP(UDiv) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 1: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 2: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 4: - GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); - break; - case 8: - GD = static_cast(Src1) / static_cast(Src2); - break; - case 16: { - __uint128_t Tmp = *GetSrc<__uint128_t*>(Data->SSAData, Op->Src1) / *GetSrc<__uint128_t*>(Data->SSAData, Op->Src2); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown Mul Size: {}\n", OpSize); break; - } -} - -DEF_OP(Rem) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 1: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 2: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 4: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 8: - GD = static_cast(Src1) % static_cast(Src2); - break; - case 16: { - __int128_t Tmp = *GetSrc<__int128_t*>(Data->SSAData, Op->Src1) % *GetSrc<__int128_t*>(Data->SSAData, Op->Src2); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown Mul Size: {}\n", OpSize); break; - } -} - -DEF_OP(URem) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 1: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 2: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 4: - GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); - break; - case 8: - GD = static_cast(Src1) % static_cast(Src2); - break; - case 16: { - __uint128_t Tmp = *GetSrc<__uint128_t*>(Data->SSAData, Op->Src1) % *GetSrc<__uint128_t*>(Data->SSAData, Op->Src2); - memcpy(GDP, &Tmp, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown Mul Size: {}\n", OpSize); break; - } -} - -DEF_OP(MulH) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - - switch (OpSize) { - case 4: { - int64_t Tmp = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); - GD = Tmp >> 32; - break; - } - case 8: { - __int128_t Tmp = static_cast<__int128_t>(static_cast(Src1)) * static_cast<__int128_t>(static_cast(Src2)); - GD = Tmp >> 64; - break; - } - default: LOGMAN_MSG_A_FMT("Unknown MulH Size: {}\n", OpSize); break; - } -} - -DEF_OP(UMulH) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - switch (OpSize) { - case 4: - GD = static_cast(Src1) * static_cast(Src2); - GD >>= 32; - break; - case 8: { - __uint128_t Tmp = static_cast<__uint128_t>(Src1) * static_cast<__uint128_t>(Src2); - GD = Tmp >> 64; - break; - } - case 16: { - // XXX: This is incorrect - __uint128_t Tmp = static_cast<__uint128_t>(Src1) * static_cast<__uint128_t>(Src2); - GD = Tmp >> 64; - break; - } - default: LOGMAN_MSG_A_FMT("Unknown UMulH Size: {}\n", OpSize); break; - } -} - -DEF_OP(Or) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto Func = [](auto a, auto b) { return a | b; }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - DO_OP(16, __uint128_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Orlshl) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto BitShift = Op->BitShift; - const auto Func = [BitShift](auto a, auto b) { return a | (b << BitShift); }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - DO_OP(16, __uint128_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Orlshr) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto BitShift = Op->BitShift; - const auto Func = [BitShift](auto a, auto b) { return a | (b >> BitShift); }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - DO_OP(16, __uint128_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(And) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto Func = [](auto a, auto b) { return a & b; }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Andn) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - constexpr auto Func = [](auto a, auto b) { - using Type = decltype(a); - return static_cast(a & static_cast(~b)); - }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Xor) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Src1); - void *Src2 = GetSrc(Data->SSAData, Op->Src2); - const auto Func = [](auto a, auto b) { return a ^ b; }; - - switch (OpSize) { - DO_OP(1, uint8_t, Func) - DO_OP(2, uint16_t, Func) - DO_OP(4, uint32_t, Func) - DO_OP(8, uint64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Lshl) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - const uint8_t Mask = OpSize * 8 - 1; - switch (OpSize) { - case 4: - GD = static_cast(Src1) << (Src2 & Mask); - break; - case 8: - GD = static_cast(Src1) << (Src2 & Mask); - break; - default: LOGMAN_MSG_A_FMT("Unknown LSHL Size: {}\n", OpSize); break; - } -} - -DEF_OP(Lshr) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - const uint8_t Mask = OpSize * 8 - 1; - switch (OpSize) { - case 4: - GD = static_cast(Src1) >> (Src2 & Mask); - break; - case 8: - GD = static_cast(Src1) >> (Src2 & Mask); - break; - default: LOGMAN_MSG_A_FMT("Unknown LSHR Size: {}\n", OpSize); break; - } -} - -DEF_OP(Ashr) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - const uint8_t Mask = OpSize * 8 - 1; - switch (OpSize) { - case 4: - GD = (uint32_t)(static_cast(Src1) >> (Src2 & Mask)); - break; - case 8: - GD = (uint64_t)(static_cast(Src1) >> (Src2 & Mask)); - break; - default: LOGMAN_MSG_A_FMT("Unknown ASHR Size: {}\n", OpSize); break; - } -} - -DEF_OP(Ror) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src2); - const auto Ror = [] (auto In, auto R) { - const auto RotateMask = sizeof(In) * 8 - 1; - R &= RotateMask; - return (In >> R) | (In << (sizeof(In) * 8 - R)); - }; - - switch (OpSize) { - case 4: - GD = Ror(static_cast(Src1), static_cast(Src2)); - break; - case 8: { - GD = Ror(static_cast(Src1), static_cast(Src2)); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown ROR Size: {}\n", OpSize); break; - } -} - -DEF_OP(Extr) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Upper); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Lower); - const auto Extr = [] (auto Src1, auto Src2, uint8_t lsb) -> decltype(Src1) { - __uint128_t Result{}; - Result = Src1; - Result <<= sizeof(Src1) * 8; - Result |= Src2; - Result >>= lsb; - return Result; - }; - - switch (OpSize) { - case 4: - GD = Extr(static_cast(Src1), static_cast(Src2), Op->LSB); - break; - case 8: { - GD = Extr(static_cast(Src1), static_cast(Src2), Op->LSB); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown EXTR Size: {}\n", OpSize); break; - } -} - -DEF_OP(PDep) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - if (OpSize != 4 && OpSize != 8) { - LOGMAN_MSG_A_FMT("Unknown PDep Size: {}\n", OpSize); - return; - } - - const uint64_t Input = OpSize == 4 ? *GetSrc(Data->SSAData, Op->Input) - : *GetSrc(Data->SSAData, Op->Input); - uint64_t Mask = OpSize == 4 ? *GetSrc(Data->SSAData, Op->Mask) - : *GetSrc(Data->SSAData, Op->Mask); - - uint64_t Result = 0; - for (uint64_t Index = 0; Mask > 0; Index++) { - const uint64_t Offset = std::countr_zero(Mask); - Mask &= Mask - 1; - Result |= ((Input >> Index) & 1) << Offset; - } - - GD = Result; -} - -DEF_OP(PExt) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - if (OpSize != 4 && OpSize != 8) { - LOGMAN_MSG_A_FMT("Unknown PExt Size: {}\n", OpSize); - return; - } - - const uint64_t Input = OpSize == 4 ? *GetSrc(Data->SSAData, Op->Input) - : *GetSrc(Data->SSAData, Op->Input); - uint64_t Mask = OpSize == 4 ? *GetSrc(Data->SSAData, Op->Mask) - : *GetSrc(Data->SSAData, Op->Mask); - - uint64_t Result = 0; - for (uint64_t Offset = 0; Mask > 0; Offset++) { - const uint64_t Index = std::countr_zero(Mask); - Mask &= Mask - 1; - Result |= ((Input >> Index) & 1) << Offset; - } - - GD = Result; -} - -DEF_OP(LDiv) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - // Each source is OpSize in size - // So you can have up to a 128bit divide from x86-64 - switch (OpSize) { - case 2: { - const uint16_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint16_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int16_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const int32_t Source = (static_cast(SrcHigh) << 16) | SrcLow; - const int32_t Res = Source / Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 4: { - const uint32_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint32_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int32_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const int64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; - const int64_t Res = Source / Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 8: { - const uint64_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint64_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int64_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const __int128_t Source = (static_cast<__int128_t>(SrcHigh) << 64) | SrcLow; - const __int128_t Res = Source / Divisor; - - // We only store the lower bits of the result - memcpy(GDP, &Res, OpSize); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown LDIV Size: {}", OpSize); break; - } -} - -DEF_OP(LUDiv) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - // Each source is OpSize in size - // So you can have up to a 128bit divide from x86-64 - switch (OpSize) { - case 2: { - const uint16_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint16_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint16_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const uint32_t Source = (static_cast(SrcHigh) << 16) | SrcLow; - const uint32_t Res = Source / Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 4: { - const uint32_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint32_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint32_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const uint64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; - const uint64_t Res = Source / Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 8: { - const uint64_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint64_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint64_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const __uint128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow; - const __uint128_t Res = Source / Divisor; - - // We only store the lower bits of the result - memcpy(GDP, &Res, OpSize); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown LUDIV Size: {}", OpSize); break; - } -} - -DEF_OP(LRem) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - // Each source is OpSize in size - // So you can have up to a 128bit Remainder from x86-64 - switch (OpSize) { - case 2: { - const uint16_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint16_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int16_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const int32_t Source = (static_cast(SrcHigh) << 16) | SrcLow; - const int32_t Res = Source % Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 4: { - const uint32_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint32_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int32_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const int64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; - const int64_t Res = Source % Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 8: { - const uint64_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint64_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const int64_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const __int128_t Source = (static_cast<__int128_t>(SrcHigh) << 64) | SrcLow; - const __int128_t Res = Source % Divisor; - // We only store the lower bits of the result - memcpy(GDP, &Res, OpSize); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown LREM Size: {}", OpSize); break; - } -} - -DEF_OP(LURem) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - // Each source is OpSize in size - // So you can have up to a 128bit Remainder from x86-64 - switch (OpSize) { - case 2: { - const uint16_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint16_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint16_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const uint32_t Source = (static_cast(SrcHigh) << 16) | SrcLow; - const uint32_t Res = Source % Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 4: { - const uint32_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint32_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint32_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const uint64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; - const uint64_t Res = Source % Divisor; - - // We only store the lower bits of the result - GD = static_cast(Res); - break; - } - case 8: { - const uint64_t SrcLow = *GetSrc(Data->SSAData, Op->Lower); - const uint64_t SrcHigh = *GetSrc(Data->SSAData, Op->Upper); - const uint64_t Divisor = *GetSrc(Data->SSAData, Op->Divisor); - const __uint128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow; - const __uint128_t Res = Source % Divisor; - // We only store the lower bits of the result - memcpy(GDP, &Res, OpSize); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown LUREM Size: {}", OpSize); break; - } -} - -DEF_OP(Not) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src); - const uint64_t mask[9]= { 0, 0xFF, 0xFFFF, 0, 0xFFFFFFFF, 0, 0, 0, 0xFFFFFFFFFFFFFFFFULL }; - const uint64_t Mask = mask[OpSize]; - GD = (~Src) & Mask; -} - -DEF_OP(Popcount) { - auto Op = IROp->C(); - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::popcount(Src); -} - -DEF_OP(FindLSB) { - auto Op = IROp->C(); - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src); - const uint64_t Result = FindFirstSetBit(Src); - GD = Result - 1; -} - -DEF_OP(FindMSB) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - switch (OpSize) { - case 1: GD = (OpSize * 8 - std::countl_zero(*GetSrc(Data->SSAData, Op->Src))) - 1; break; - case 2: GD = (OpSize * 8 - std::countl_zero(*GetSrc(Data->SSAData, Op->Src))) - 1; break; - case 4: GD = (OpSize * 8 - std::countl_zero(*GetSrc(Data->SSAData, Op->Src))) - 1; break; - case 8: GD = (OpSize * 8 - std::countl_zero(*GetSrc(Data->SSAData, Op->Src))) - 1; break; - default: LOGMAN_MSG_A_FMT("Unknown FindMSB size: {}", OpSize); break; - } -} - -DEF_OP(FindTrailingZeroes) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - switch (OpSize) { - case 1: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countr_zero(Src); - break; - } - case 2: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countr_zero(Src); - break; - } - case 4: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countr_zero(Src); - break; - } - case 8: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countr_zero(Src); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(CountLeadingZeroes) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - switch (OpSize) { - case 1: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countl_zero(Src); - break; - } - case 2: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countl_zero(Src); - break; - } - case 4: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countl_zero(Src); - break; - } - case 8: { - const auto Src = *GetSrc(Data->SSAData, Op->Src); - GD = std::countl_zero(Src); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown size: {}", OpSize); break; - } -} - -DEF_OP(Rev) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - switch (OpSize) { - case 2: GD = BSwap16(*GetSrc(Data->SSAData, Op->Src)); break; - case 4: GD = BSwap32(*GetSrc(Data->SSAData, Op->Src)); break; - case 8: GD = BSwap64(*GetSrc(Data->SSAData, Op->Src)); break; - default: LOGMAN_MSG_A_FMT("Unknown REV size: {}", OpSize); break; - } -} - -DEF_OP(Bfi) { - auto Op = IROp->C(); - uint64_t SourceMask = (1ULL << Op->Width) - 1; - if (Op->Width == 64) { - SourceMask = ~0ULL; - } - const uint64_t DestMask = ~(SourceMask << Op->lsb); - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Dest); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src); - const uint64_t Res = (Src1 & DestMask) | ((Src2 & SourceMask) << Op->lsb); - GD = Res; -} - -DEF_OP(Bfxil) { - auto Op = IROp->C(); - uint64_t SourceMask = (1ULL << Op->Width) - 1; - if (Op->Width == 64) { - SourceMask = ~0ULL; - } - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Dest); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Src); - const uint64_t Res = (Src1 & ~SourceMask) | ((Src2 >> Op->lsb) & SourceMask); - GD = Res; -} - -DEF_OP(Bfe) { - auto Op = IROp->C(); - - LOGMAN_THROW_AA_FMT(IROp->Size <= 8, "OpSize is too large for BFE: {}", IROp->Size); - uint64_t SourceMask = (1ULL << Op->Width) - 1; - if (Op->Width == 64) { - SourceMask = ~0ULL; - } - SourceMask <<= Op->lsb; - const uint64_t Src = *GetSrc(Data->SSAData, Op->Src); - GD = (Src & SourceMask) >> Op->lsb; -} - -DEF_OP(Sbfe) { - auto Op = IROp->C(); - - LOGMAN_THROW_AA_FMT(IROp->Size <= 8, "OpSize is too large for SBFE: {}", IROp->Size); - int64_t Src = *GetSrc(Data->SSAData, Op->Src); - const uint64_t ShiftLeftAmount = (64 - (Op->Width + Op->lsb)); - const uint64_t ShiftRightAmount = ShiftLeftAmount + Op->lsb; - Src <<= ShiftLeftAmount; - Src >>= ShiftRightAmount; - GD = Src; -} - -DEF_OP(Select) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - uint64_t ArgTrue; - uint64_t ArgFalse; - if (OpSize == 4) { - ArgTrue = *GetSrc(Data->SSAData, Op->TrueVal); - ArgFalse = *GetSrc(Data->SSAData, Op->FalseVal); - } else { - ArgTrue = *GetSrc(Data->SSAData, Op->TrueVal); - ArgFalse = *GetSrc(Data->SSAData, Op->FalseVal); - } - - bool CompResult; - - if (Op->CompareSize == 4) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Cmp1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Cmp2); - CompResult = IsConditionTrue(Op->Cond.Val, Src1, Src2); - } - else if (Op->CompareSize == 8) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Cmp1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Cmp2); - CompResult = IsConditionTrue(Op->Cond.Val, Src1, Src2); - } - else if (Op->CompareSize == 16) { - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Cmp1); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Cmp2); - CompResult = IsConditionTrue<__uint128_t, __int128_t, double>(Op->Cond.Val, Src1, Src2); - } - else { - LOGMAN_MSG_A_FMT("Unknown select size: {}", Op->CompareSize); - FEX_UNREACHABLE; - } - - GD = CompResult ? ArgTrue : ArgFalse; -} - -DEF_OP(VExtractToGPR) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - constexpr auto AVXRegSize = Core::CPUState::XMM_AVX_REG_SIZE; - constexpr auto SSERegSize = Core::CPUState::XMM_SSE_REG_SIZE; - constexpr auto SSEBitSize = SSERegSize * 8; - - const auto ElementSize = Op->Header.ElementSize; - const auto ElementSizeBits = ElementSize * 8; - const auto Shift = ElementSizeBits * Op->Index; - - const uint32_t SourceSize = GetOpSize(Data->CurrentIR, Op->Vector); - - LOGMAN_THROW_AA_FMT(OpSize <= AVXRegSize, - "OpSize is too large for VExtractToGPR: {}", OpSize); - - if (SourceSize >= SSERegSize) { - __uint128_t SourceMask = (1ULL << ElementSizeBits) - 1; - if (ElementSize == 8) { - SourceMask = ~0ULL; - } - - const auto Src = *GetSrc(Data->SSAData, Op->Vector); - - const auto GetResult = [&] { - if (Shift >= SSEBitSize) { - const auto NormalizedShift = Shift - SSEBitSize; - return (Src.Upper >> NormalizedShift) & SourceMask; - } else { - return (Src.Lower >> Shift) & SourceMask; - } - }; - - const auto Result = GetResult(); - memcpy(GDP, &Result, ElementSize); - } - else { - uint64_t SourceMask = (1ULL << ElementSizeBits) - 1; - if (ElementSize == 8) { - SourceMask = ~0ULL; - } - - const uint64_t Src = *GetSrc(Data->SSAData, Op->Vector); - const uint64_t Result = (Src >> Shift) & SourceMask; - GD = Result; - } -} - -DEF_OP(Float_ToGPR_ZS) { - auto Op = IROp->C(); - const uint16_t Conv = (IROp->Size << 8) | Op->SrcElementSize; - switch (Conv) { - case 0x0804: { // int64_t <- float - const int64_t Dst = (int64_t)std::trunc(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0808: { // int64_t <- double - const int64_t Dst = (int64_t)std::trunc(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0404: { // int32_t <- float - const int32_t Dst = (int32_t)std::trunc(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0408: { // int32_t <- double - const int32_t Dst = (int32_t)std::trunc(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - } -} - -DEF_OP(Float_ToGPR_S) { - auto Op = IROp->C(); - const uint16_t Conv = (IROp->Size << 8) | Op->SrcElementSize; - switch (Conv) { - case 0x0804: { // int64_t <- float - const int64_t Dst = (int64_t)std::nearbyint(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0808: { // int64_t <- double - const int64_t Dst = (int64_t)std::nearbyint(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0404: { // int32_t <- float - const int32_t Dst = (int32_t)std::nearbyint(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - case 0x0408: { // int32_t <- double - const int32_t Dst = (int32_t)std::nearbyint(*GetSrc(Data->SSAData, Op->Scalar)); - memcpy(GDP, &Dst, IROp->Size); - break; - } - } -} - -DEF_OP(FCmp) { - auto Op = IROp->C(); - uint32_t ResultFlags{}; - if (Op->ElementSize == 4) { - const float Src1 = *GetSrc(Data->SSAData, Op->Scalar1); - const float Src2 = *GetSrc(Data->SSAData, Op->Scalar2); - const bool Unordered = std::isnan(Src1) || std::isnan(Src2); - if (Op->Flags & (1 << IR::FCMP_FLAG_LT)) { - if (Unordered || (Src1 < Src2)) { - ResultFlags |= (1 << IR::FCMP_FLAG_LT); - } - } - if (Op->Flags & (1 << IR::FCMP_FLAG_UNORDERED)) { - if (Unordered) { - ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED); - } - } - if (Op->Flags & (1 << IR::FCMP_FLAG_EQ)) { - if (Unordered || (Src1 == Src2)) { - ResultFlags |= (1 << IR::FCMP_FLAG_EQ); - } - } - } - else { - const double Src1 = *GetSrc(Data->SSAData, Op->Scalar1); - const double Src2 = *GetSrc(Data->SSAData, Op->Scalar2); - const bool Unordered = std::isnan(Src1) || std::isnan(Src2); - if (Op->Flags & (1 << IR::FCMP_FLAG_LT)) { - if (Unordered || (Src1 < Src2)) { - ResultFlags |= (1 << IR::FCMP_FLAG_LT); - } - } - if (Op->Flags & (1 << IR::FCMP_FLAG_UNORDERED)) { - if (Unordered) { - ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED); - } - } - if (Op->Flags & (1 << IR::FCMP_FLAG_EQ)) { - if (Unordered || (Src1 == Src2)) { - ResultFlags |= (1 << IR::FCMP_FLAG_EQ); - } - } - } - - GD = ResultFlags; -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp deleted file mode 100644 index 2efdc4ca7..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp +++ /dev/null @@ -1,792 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -#include - -namespace FEXCore::CPU { - -#ifdef _M_X86_64 -uint8_t AtomicFetchNeg(uint8_t *Addr) { - using Type = uint8_t; - std::atomic *MemData = reinterpret_cast*>(Addr); - Type Expected = MemData->load(); - Type Desired = -Expected; - do { - Desired = -Expected; - } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst)); - - return Expected; -} - -uint16_t AtomicFetchNeg(uint16_t *Addr) { - using Type = uint16_t; - std::atomic *MemData = reinterpret_cast*>(Addr); - Type Expected = MemData->load(); - Type Desired = -Expected; - do { - Desired = -Expected; - } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst)); - - return Expected; -} - -uint32_t AtomicFetchNeg(uint32_t *Addr) { - using Type = uint32_t; - std::atomic *MemData = reinterpret_cast*>(Addr); - Type Expected = MemData->load(); - Type Desired = -Expected; - do { - Desired = -Expected; - } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst)); - - return Expected; -} - -uint64_t AtomicFetchNeg(uint64_t *Addr) { - using Type = uint64_t; - std::atomic *MemData = reinterpret_cast*>(Addr); - Type Expected = MemData->load(); - Type Desired = -Expected; - do { - Desired = -Expected; - } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst)); - - return Expected; -} - -template -T AtomicCompareAndSwap(T expected, T desired, T *addr) -{ - std::atomic *MemData = reinterpret_cast*>(addr); - - T Src1 = expected; - T Src2 = desired; - - T Expected = Src1; - bool Result = MemData->compare_exchange_strong(Expected, Src2); - - return Result ? Src1 : Expected; -} - -template uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr); -template uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr); -template uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr); -template uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr); - -#else -// Needs to match what the AArch64 JIT and unaligned signal handler expects -uint8_t AtomicFetchNeg(uint8_t *Addr) { - using Type = uint8_t; - Type Result{}; - Type Tmp{}; - Type TmpStatus{}; - - __asm__ volatile( - R"( - 1: - ldaxrb %w[Result], [%[Memory]]; - neg %w[Tmp], %w[Result]; - stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]]; - cbnz %w[TmpStatus], 1b; - )" - : [Result] "=r" (Result) - , [Tmp] "=r" (Tmp) - , [TmpStatus] "=r" (TmpStatus) - , [Memory] "+r" (Addr) - :: "memory" - ); - return Result; -} - -uint16_t AtomicFetchNeg(uint16_t *Addr) { - using Type = uint16_t; - Type Result{}; - Type Tmp{}; - Type TmpStatus{}; - - __asm__ volatile( - R"( - 1: - ldaxrh %w[Result], [%[Memory]]; - neg %w[Tmp], %w[Result]; - stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]]; - cbnz %w[TmpStatus], 1b; - )" - : [Result] "=r" (Result) - , [Tmp] "=r" (Tmp) - , [TmpStatus] "=r" (TmpStatus) - , [Memory] "+r" (Addr) - :: "memory" - ); - return Result; -} - -uint32_t AtomicFetchNeg(uint32_t *Addr) { - using Type = uint32_t; - Type Result{}; - Type Tmp{}; - Type TmpStatus{}; - - __asm__ volatile( - R"( - 1: - ldaxr %w[Result], [%[Memory]]; - neg %w[Tmp], %w[Result]; - stlxr %w[TmpStatus], %w[Tmp], [%[Memory]]; - cbnz %w[TmpStatus], 1b; - )" - : [Result] "=r" (Result) - , [Tmp] "=r" (Tmp) - , [TmpStatus] "=r" (TmpStatus) - , [Memory] "+r" (Addr) - :: "memory" - ); - return Result; -} - -uint64_t AtomicFetchNeg(uint64_t *Addr) { - using Type = uint64_t; - Type Result{}; - Type Tmp{}; - Type TmpStatus{}; - - __asm__ volatile( - R"( - 1: - ldaxr %[Result], [%[Memory]]; - neg %[Tmp], %[Result]; - stlxr %w[TmpStatus], %[Tmp], [%[Memory]]; - cbnz %w[TmpStatus], 1b; - )" - : [Result] "=r" (Result) - , [Tmp] "=r" (Tmp) - , [TmpStatus] "=r" (TmpStatus) - , [Memory] "+r" (Addr) - :: "memory" - ); - return Result; -} - -template<> -uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) { - using Type = uint8_t; - //force Result to r9 (scratch register) or clang spills to stack - register Type Result asm("r9"){}; - Type Tmp{}; - Type Tmp2{}; - __asm__ volatile( - R"( - 1: - ldaxrb %w[Tmp], [%[Memory]]; - cmp %w[Tmp], %w[Expected], uxtb; - b.ne 2f; - stlxrb %w[Tmp2], %w[Desired], [%[Memory]]; - cbnz %w[Tmp2], 1b; - mov %w[Result], %w[Expected]; - b 3f; - 2: - mov %w[Result], %w[Tmp]; - clrex; - 3: - )" - : [Tmp] "=r" (Tmp) - , [Tmp2] "=r" (Tmp2) - , [Desired] "+r" (desired) - , [Expected] "+r" (expected) - , [Result] "=r" (Result) - , [Memory] "+r" (addr) - :: "memory" - ); - return Result; -} - -template<> -uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) { - using Type = uint16_t; - //force Result to r9 (scratch register) or clang spills to stack - register Type Result asm("r9"){}; - Type Tmp{}; - Type Tmp2{}; - __asm__ volatile( - R"( - 1: - ldaxrh %w[Tmp], [%[Memory]]; - cmp %w[Tmp], %w[Expected], uxth; - b.ne 2f; - stlxrh %w[Tmp2], %w[Desired], [%[Memory]]; - cbnz %w[Tmp2], 1b; - mov %w[Result], %w[Expected]; - b 3f; - 2: - mov %w[Result], %w[Tmp]; - clrex; - 3: - )" - : [Tmp] "=r" (Tmp) - , [Tmp2] "=r" (Tmp2) - , [Desired] "+r" (desired) - , [Expected] "+r" (expected) - , [Result] "=r" (Result) - , [Memory] "+r" (addr) - :: "memory" - ); - return Result; -} - -template<> -uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) { - using Type = uint32_t; - //force Result to r9 (scratch register) or clang spills to stack - register Type Result asm("r9"){}; - Type Tmp{}; - Type Tmp2{}; - __asm__ volatile( - R"( - 1: - ldaxr %w[Tmp], [%[Memory]]; - cmp %w[Tmp], %w[Expected]; - b.ne 2f; - stlxr %w[Tmp2], %w[Desired], [%[Memory]]; - cbnz %w[Tmp2], 1b; - mov %w[Result], %w[Expected]; - b 3f; - 2: - mov %w[Result], %w[Tmp]; - clrex; - 3: - )" - : [Tmp] "=r" (Tmp) - , [Tmp2] "=r" (Tmp2) - , [Desired] "+r" (desired) - , [Expected] "+r" (expected) - , [Result] "=r" (Result) - , [Memory] "+r" (addr) - :: "memory" - ); - return Result; -} - -template<> -uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) { - using Type = uint64_t; - //force Result to r9 (scratch register) or clang spills to stack - register Type Result asm("r9"){}; - Type Tmp{}; - Type Tmp2{}; - __asm__ volatile( - R"( - 1: - ldaxr %[Tmp], [%[Memory]]; - cmp %[Tmp], %[Expected]; - b.ne 2f; - stlxr %w[Tmp2], %[Desired], [%[Memory]]; - cbnz %w[Tmp2], 1b; - mov %[Result], %[Expected]; - b 3f; - 2: - mov %[Result], %[Tmp]; - clrex; - 3: - )" - : [Tmp] "=r" (Tmp) - , [Tmp2] "=r" (Tmp2) - , [Desired] "+r" (desired) - , [Expected] "+r" (expected) - , [Result] "=r" (Result) - , [Memory] "+r" (addr) - :: "memory" - ); - return Result; -} - -#endif - -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(CASPair) { - auto Op = IROp->C(); - - // Size is the size of each pair element - switch (IROp->ElementSize) { - case 4: { - GD = AtomicCompareAndSwap( - *GetSrc(Data->SSAData, Op->Expected), - *GetSrc(Data->SSAData, Op->Desired), - *GetSrc(Data->SSAData, Op->Addr) - ); - break; - } - case 8: { - std::atomic<__uint128_t> *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - - __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Expected); - __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Desired); - - __uint128_t Expected = Src1; - bool Result = MemData->compare_exchange_strong(Expected, Src2); - memcpy(GDP, Result ? &Src1 : &Expected, 16); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", IROp->ElementSize); break; - } -} - -DEF_OP(CAS) { - auto Op = IROp->C(); - uint8_t OpSize = IROp->Size; - - switch (OpSize) { - case 1: { - GD = AtomicCompareAndSwap( - *GetSrc(Data->SSAData, Op->Expected), - *GetSrc(Data->SSAData, Op->Desired), - *GetSrc(Data->SSAData, Op->Addr) - ); - break; - } - case 2: { - GD = AtomicCompareAndSwap( - *GetSrc(Data->SSAData, Op->Expected), - *GetSrc(Data->SSAData, Op->Desired), - *GetSrc(Data->SSAData, Op->Addr) - ); - break; - } - case 4: { - GD = AtomicCompareAndSwap( - *GetSrc(Data->SSAData, Op->Expected), - *GetSrc(Data->SSAData, Op->Desired), - *GetSrc(Data->SSAData, Op->Addr) - ); - break; - } - case 8: { - GD = AtomicCompareAndSwap( - *GetSrc(Data->SSAData, Op->Expected), - *GetSrc(Data->SSAData, Op->Desired), - *GetSrc(Data->SSAData, Op->Addr) - ); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break; - } -} - -DEF_OP(AtomicAdd) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData += Src; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData += Src; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData += Src; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData += Src; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicSub) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData -= Src; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData -= Src; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData -= Src; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData -= Src; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicAnd) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData &= Src; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData &= Src; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData &= Src; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData &= Src; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicOr) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData |= Src; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData |= Src; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData |= Src; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData |= Src; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicXor) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData ^= Src; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData ^= Src; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData ^= Src; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - *MemData ^= Src; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicSwap) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->exchange(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->exchange(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->exchange(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->exchange(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchAdd) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->fetch_add(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->fetch_add(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->fetch_add(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->fetch_add(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchSub) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->fetch_sub(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->fetch_sub(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->fetch_sub(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->fetch_sub(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchAnd) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->fetch_and(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->fetch_and(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->fetch_and(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->fetch_and(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchOr) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->fetch_or(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->fetch_or(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->fetch_or(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->fetch_or(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchXor) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint8_t Src = *GetSrc(Data->SSAData, Op->Value); - uint8_t Previous = MemData->fetch_xor(Src); - GD = Previous; - break; - } - case 2: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint16_t Src = *GetSrc(Data->SSAData, Op->Value); - uint16_t Previous = MemData->fetch_xor(Src); - GD = Previous; - break; - } - case 4: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint32_t Src = *GetSrc(Data->SSAData, Op->Value); - uint32_t Previous = MemData->fetch_xor(Src); - GD = Previous; - break; - } - case 8: { - std::atomic *MemData = *GetSrc **>(Data->SSAData, Op->Addr); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - uint64_t Previous = MemData->fetch_xor(Src); - GD = Previous; - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} - -DEF_OP(AtomicFetchNeg) { - auto Op = IROp->C(); - switch (IROp->Size) { - case 1: { - using Type = uint8_t; - GD = AtomicFetchNeg(*GetSrc(Data->SSAData, Op->Addr)); - break; - } - case 2: { - using Type = uint16_t; - GD = AtomicFetchNeg(*GetSrc(Data->SSAData, Op->Addr)); - break; - } - case 4: { - using Type = uint32_t; - GD = AtomicFetchNeg(*GetSrc(Data->SSAData, Op->Addr)); - break; - } - case 8: { - using Type = uint64_t; - GD = AtomicFetchNeg(*GetSrc(Data->SSAData, Op->Addr)); - break; - } - default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size); - } -} -DEF_OP(TelemetrySetValue) { -#ifndef FEX_DISABLE_TELEMETRY - auto Op = IROp->C(); - uint64_t Src = *GetSrc(Data->SSAData, Op->Value); - - auto TelemetryPtr = reinterpret_cast*>(Data->State->CurrentFrame->Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex]); - uint64_t Set{}; - if (Src != 0) { - Set = 1; - } - - *TelemetryPtr |= Set; -#endif -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp deleted file mode 100644 index 7d4e590f3..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Context/Context.h" -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" -#include "Interface/HLE/Thunks/Thunks.h" - -#include -#include - -#include -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) - -DEF_OP(CallbackReturn) { - Data->State->CurrentFrame->Pointers.Interpreter.CallbackReturn(Data->State, Data->StackEntry); -} - -DEF_OP(ExitFunction) { - auto Op = IROp->C(); - uint8_t OpSize = IROp->Size; - - uintptr_t* ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - - void *ContextData = reinterpret_cast(ContextPtr); - void *Src = GetSrc(Data->SSAData, Op->NewRIP); - - memcpy(ContextData, Src, OpSize); - - Data->BlockResults.Quit = true; -} - -DEF_OP(Jump) { - auto Op = IROp->C(); - const uintptr_t ListBegin = Data->CurrentIR->GetListData(); - const uintptr_t DataBegin = Data->CurrentIR->GetData(); - - Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TargetBlock); - Data->BlockResults.Redo = true; -} - -DEF_OP(CondJump) { - auto Op = IROp->C(); - const uintptr_t ListBegin = Data->CurrentIR->GetListData(); - const uintptr_t DataBegin = Data->CurrentIR->GetData(); - - bool CompResult; - - const uint64_t Src1 = *GetSrc(Data->SSAData, Op->Cmp1); - const uint64_t Src2 = *GetSrc(Data->SSAData, Op->Cmp2); - - if (Op->CompareSize == 4) - CompResult = IsConditionTrue(Op->Cond.Val, Src1, Src2); - else - CompResult = IsConditionTrue(Op->Cond.Val, Src1, Src2); - - if (CompResult) { - Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock); - } - else { - Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock); - } - Data->BlockResults.Redo = true; -} - -DEF_OP(Syscall) { - auto Op = IROp->C(); - - FEXCore::HLE::SyscallArguments Args; - for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) { - if (Op->Header.Args[j].IsInvalid()) break; - Args.Argument[j] = *GetSrc(Data->SSAData, Op->Header.Args[j]); - } - - uint64_t Res = FEXCore::Context::HandleSyscall(static_cast(Data->State->CTX)->SyscallHandler, Data->State->CurrentFrame, &Args); - GD = Res; -} - -DEF_OP(InlineSyscall) { - auto Op = IROp->C(); - - FEXCore::HLE::SyscallArguments Args; - for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) { - if (Op->Header.Args[j].IsInvalid()) break; - Args.Argument[j] = *GetSrc(Data->SSAData, Op->Header.Args[j]); - } - - // We don't want the errno handling but I also don't want to write inline ASM atm - uint64_t Res = syscall( - Op->HostSyscallNumber, - Args.Argument[0], - Args.Argument[1], - Args.Argument[2], - Args.Argument[3], - Args.Argument[4], - Args.Argument[5], - Args.Argument[6] - ); - - if (Res == -1) { - Res = -errno; - } - - GD = Res; -} - -DEF_OP(Thunk) { - auto Op = IROp->C(); - - auto thunkFn = static_cast(Data->State->CTX)->ThunkHandler->LookupThunk(Op->ThunkNameHash); - thunkFn(*GetSrc(Data->SSAData, Op->ArgPtr)); -} - -DEF_OP(ValidateCode) { - auto Op = IROp->C(); - - auto CodePtr = Data->CurrentEntry + Op->Offset; - if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) { - GD = 1; - } else { - GD = 0; - } -} - -DEF_OP(ThreadRemoveCodeEntry) { - static_cast(Data->State->CTX)->ThreadRemoveCodeEntryFromJit(Data->State->CurrentFrame, Data->CurrentEntry); -} - -DEF_OP(CPUID) { - auto Op = IROp->C(); - uint64_t *DstPtr = GetDest(Data->SSAData, Node); - const uint64_t Arg = *GetSrc(Data->SSAData, Op->Function); - const uint64_t Leaf = *GetSrc(Data->SSAData, Op->Leaf); - - auto Results = Data->State->CTX->RunCPUIDFunction(Arg, Leaf); - memcpy(DstPtr, &Results, sizeof(uint32_t) * 4); -} - -DEF_OP(XGETBV) { - auto Op = IROp->C(); - uint32_t *DstPtr = GetDest(Data->SSAData, Node); - const uint32_t Function = *GetSrc(Data->SSAData, Op->Function); - - auto Results = Data->State->CTX->RunXCRFunction(Function); - memcpy(DstPtr, &Results, sizeof(uint32_t) * 2); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp deleted file mode 100644 index a71ad8862..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp +++ /dev/null @@ -1,279 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(VInsGPR) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = Op->Header.ElementSize; - const auto ElementSizeBits = ElementSize * 8; - constexpr auto SSEBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8; - - const uint64_t Offset = Op->DestIdx * ElementSizeBits; - const auto InUpperLane = Offset >= SSEBitSize; - - __uint128_t Mask = (1ULL << ElementSizeBits) - 1; - if (ElementSize == 8) { - Mask = ~0ULL; - } - - const auto Src1 = *GetSrc(Data->SSAData, Op->DestVector); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Src); - - const auto Scalar = Src2 & Mask; - const auto ScaledOffset = InUpperLane ? Offset - SSEBitSize - : Offset; - - // Now shift into place and set all bits but - // the ones where we're going to insert our value. - Mask <<= ScaledOffset; - Mask = ~Mask; - - const auto Dst = [&] { - if (InUpperLane) { - return InterpVector256{ - .Lower = Src1.Lower, - .Upper = (Src1.Upper & Mask) | (Scalar << ScaledOffset), - }; - } else { - return InterpVector256{ - .Lower = (Src1.Lower & Mask) | (Scalar << ScaledOffset), - .Upper = Src1.Upper, - }; - } - }(); - - memcpy(GDP, &Dst, OpSize); -} - -DEF_OP(VCastFromGPR) { - auto Op = IROp->C(); - memcpy(GDP, GetSrc(Data->SSAData, Op->Src), Op->Header.ElementSize); -} - -DEF_OP(VDupFromGPR) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = IROp->ElementSize; - const auto NumElements = OpSize / IROp->ElementSize; - - TempVectorDataArray Tmp{}; - - const auto *Src = GetSrc(Data->SSAData, Op->Src); - for (size_t i = 0; i < NumElements; i++) { - memcpy(&Tmp[i * ElementSize], Src, ElementSize); - } - - memcpy(GDP, Tmp.data(), sizeof(Tmp)); -} - -DEF_OP(Float_FromGPR_S) { - auto Op = IROp->C(); - - const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize; - switch (Conv) { - case 0x0404: { // Float <- int32_t - const float Dst = (float)*GetSrc(Data->SSAData, Op->Src); - memcpy(GDP, &Dst, Op->Header.ElementSize); - break; - } - case 0x0408: { // Float <- int64_t - const float Dst = (float)*GetSrc(Data->SSAData, Op->Src); - memcpy(GDP, &Dst, Op->Header.ElementSize); - break; - } - case 0x0804: { // Double <- int32_t - const double Dst = (double)*GetSrc(Data->SSAData, Op->Src); - memcpy(GDP, &Dst, Op->Header.ElementSize); - break; - } - case 0x0808: { // Double <- int64_t - const double Dst = (double)*GetSrc(Data->SSAData, Op->Src); - memcpy(GDP, &Dst, Op->Header.ElementSize); - break; - } - } -} - -DEF_OP(Float_FToF) { - auto Op = IROp->C(); - const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize; - switch (Conv) { - case 0x0804: { // Double <- Float - const double Dst = (double)*GetSrc(Data->SSAData, Op->Scalar); - memcpy(GDP, &Dst, 8); - break; - } - case 0x0408: { // Float <- Double - const float Dst = (float)*GetSrc(Data->SSAData, Op->Scalar); - memcpy(GDP, &Dst, 4); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv); - } -} - -DEF_OP(Vector_SToF) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize; - - const auto Func = [](auto a, auto min, auto max) { return a; }; - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(Vector_FToZS) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize; - - const auto Func = [](auto a, auto min, auto max) { return std::trunc(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(Vector_FToS) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize; - - const auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(Vector_FToF) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint16_t ElementSize = Op->Header.ElementSize; - const uint16_t Conv = (ElementSize << 8) | Op->SrcElementSize; - - const auto Func = [](auto a, auto min, auto max) { return a; }; - switch (Conv) { - case 0x0804: { // Double <- float - // Only the lower elements from the source - // This uses half the source elements - uint8_t Elements = OpSize / 8; - DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0) - break; - } - case 0x0408: { // Float <- Double - // Little bit tricky here - // Sometimes is used to convert from a 128bit vector register - // in to a 64bit vector register with different sized elements - // eg: %5 i32v2 = Vector_FToF %4 i128, #0x8 - uint8_t Elements = OpSize == 8 ? 2 : OpSize / Op->SrcElementSize; - DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0) - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(Vector_FToI) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func_Nearest = [](auto a) { return std::rint(a); }; - const auto Func_Neg = [](auto a) { return std::floor(a); }; - const auto Func_Pos = [](auto a) { return std::ceil(a); }; - const auto Func_Trunc = [](auto a) { return std::trunc(a); }; - const auto Func_Host = [](auto a) { return std::rint(a); }; - - switch (Op->Round) { - case FEXCore::IR::Round_Nearest.Val: - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func_Nearest) - DO_VECTOR_1SRC_OP(8, double, Func_Nearest) - } - break; - case FEXCore::IR::Round_Negative_Infinity.Val: - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func_Neg) - DO_VECTOR_1SRC_OP(8, double, Func_Neg) - } - break; - case FEXCore::IR::Round_Positive_Infinity.Val: - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func_Pos) - DO_VECTOR_1SRC_OP(8, double, Func_Pos) - } - break; - case FEXCore::IR::Round_Towards_Zero.Val: - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func_Trunc) - DO_VECTOR_1SRC_OP(8, double, Func_Trunc) - } - break; - case FEXCore::IR::Round_Host.Val: - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func_Host) - DO_VECTOR_1SRC_OP(8, double, Func_Host) - } - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp deleted file mode 100644 index 935f83164..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp +++ /dev/null @@ -1,557 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -namespace AES { - static __uint128_t InvShiftRows(uint8_t *State) { - uint8_t Shifted[16] = { - State[0], State[13], State[10], State[7], - State[4], State[1], State[14], State[11], - State[8], State[5], State[2], State[15], - State[12], State[9], State[6], State[3], - }; - __uint128_t Res{}; - memcpy(&Res, Shifted, 16); - return Res; - } - - static __uint128_t InvSubBytes(uint8_t *State) { - // 16x16 matrix table - static const uint8_t InvSubstitutionTable[256] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, - }; - - // Uses a byte substitution table with a constant set of values - // Needs to do a table look up - uint8_t Substituted[16]; - for (size_t i = 0; i < 16; ++i) { - Substituted[i] = InvSubstitutionTable[State[i]]; - } - - __uint128_t Res{}; - memcpy(&Res, Substituted, 16); - return Res; - } - - static __uint128_t ShiftRows(uint8_t *State) { - uint8_t Shifted[16] = { - State[0], State[5], State[10], State[15], - State[4], State[9], State[14], State[3], - State[8], State[13], State[2], State[7], - State[12], State[1], State[6], State[11], - }; - __uint128_t Res{}; - memcpy(&Res, Shifted, 16); - return Res; - } - - static __uint128_t SubBytes(uint8_t *State, size_t Bytes) { - // 16x16 matrix table - static const uint8_t SubstitutionTable[256] = { - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, - }; - // Uses a byte substitution table with a constant set of values - // Needs to do a table look up - uint8_t Substituted[16]; - Bytes = std::min(Bytes, (size_t)16); - for (size_t i = 0; i < Bytes; ++i) { - Substituted[i] = SubstitutionTable[State[i]]; - } - - __uint128_t Res{}; - memcpy(&Res, Substituted, Bytes); - return Res; - } - - static uint8_t FFMul02(uint8_t in) { - static const uint8_t FFMul02[256] = { - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, - 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, - 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, - 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, - 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, - 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, - 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, - 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, - 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, - 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, - 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, - 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, - 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, - 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, - 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5, - }; - return FFMul02[in]; - } - - static uint8_t FFMul03(uint8_t in) { - static const uint8_t FFMul03[256] = { - 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, - 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, - 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, - 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, - 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, - 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, - 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, - 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, - 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, - 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, - 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, - 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, - 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, - 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, - 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, - 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a, - }; - return FFMul03[in]; - } - - static __uint128_t MixColumns(uint8_t *State) { - uint8_t In0[16] = { - State[0], State[4], State[8], State[12], - State[1], State[5], State[9], State[13], - State[2], State[6], State[10], State[14], - State[3], State[7], State[11], State[15], - }; - - uint8_t Out0[4]{}; - uint8_t Out1[4]{}; - uint8_t Out2[4]{}; - uint8_t Out3[4]{}; - - for (size_t i = 0; i < 4; ++i) { - Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i]; - Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i]; - Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]); - Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]); - } - - uint8_t OutArray[16] = { - Out0[0], Out1[0], Out2[0], Out3[0], - Out0[1], Out1[1], Out2[1], Out3[1], - Out0[2], Out1[2], Out2[2], Out3[2], - Out0[3], Out1[3], Out2[3], Out3[3], - }; - __uint128_t Res{}; - memcpy(&Res, OutArray, 16); - return Res; - } - - static uint8_t FFMul09(uint8_t in) { - static const uint8_t FFMul09[256] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, - 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, - 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, - 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, - 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, - 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, - 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, - 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, - 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, - 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, - 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, - 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, - 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46, - }; - return FFMul09[in]; - } - - static uint8_t FFMul0B(uint8_t in) { - static const uint8_t FFMul0B[256] = { - 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, - 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, - 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, - 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, - 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, - 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, - 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, - 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, - 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, - 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, - 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, - 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, - 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, - 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, - 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, - 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3, - }; - return FFMul0B[in]; - } - - static uint8_t FFMul0D(uint8_t in) { - static const uint8_t FFMul0D[256] = { - 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, - 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, - 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, - 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, - 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, - 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, - 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, - 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, - 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, - 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, - 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, - 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, - 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, - 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, - 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, - 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97, - }; - - return FFMul0D[in]; - } - - static uint8_t FFMul0E(uint8_t in) { - static const uint8_t FFMul0E[256] = { - 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, - 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, - 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, - 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, - 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, - 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, - 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, - 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, - 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, - 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, - 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, - 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, - 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, - 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, - 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, - 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, - }; - - return FFMul0E[in]; - } - - static __uint128_t InvMixColumns(uint8_t *State) { - uint8_t In0[16] = { - State[0], State[4], State[8], State[12], - State[1], State[5], State[9], State[13], - State[2], State[6], State[10], State[14], - State[3], State[7], State[11], State[15], - }; - - uint8_t Out0[4]{}; - uint8_t Out1[4]{}; - uint8_t Out2[4]{}; - uint8_t Out3[4]{}; - - for (size_t i = 0; i < 4; ++i) { - Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]); - Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]); - Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]); - Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]); - } - - uint8_t OutArray[16] = { - Out0[0], Out1[0], Out2[0], Out3[0], - Out0[1], Out1[1], Out2[1], Out3[1], - Out0[2], Out1[2], Out2[2], Out3[2], - Out0[3], Out1[3], Out2[3], Out3[3], - }; - __uint128_t Res{}; - memcpy(&Res, OutArray, 16); - return Res; - } -} - -namespace CRC32 { - // CRC32 per byte lookup table. - constexpr std::array CRC32CTable = []() consteval { - std::array Table{}; - - // Clang 11.x doesn't support bitreverse as a consteval - // constexpr uint32_t Polynomial = 0x1EDC6F41; - constexpr uint32_t PolynomialRev = 0x82F63B78; //__builtin_bitreverse32(Polynomial); - - for (size_t Char = 0; Char < std::size(Table); ++Char) { - uint32_t CurrentChar = Char; - for (size_t i = 0; i < 8; ++i) { - if (CurrentChar & 1) { - CurrentChar = (CurrentChar >> 1) ^ PolynomialRev; - } - else { - CurrentChar >>= 1; - } - } - Table[Char] = CurrentChar; - } - - return Table; - }(); - - uint32_t crc32cb(uint32_t Accumulator, uint8_t data) { - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ data] ^ Accumulator >> 8; - return Accumulator; - } - - uint32_t crc32ch(uint32_t Accumulator, uint16_t data) { - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8; - return Accumulator; - } - - uint32_t crc32cw(uint32_t Accumulator, uint32_t data) { - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8; - return Accumulator; - } - - uint32_t crc32cx(uint32_t Accumulator, uint64_t data) { - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 32) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 40) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 48) & 0xFF)] ^ Accumulator >> 8; - Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 56) & 0xFF)] ^ Accumulator >> 8; - return Accumulator; - } -} - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) - -DEF_OP(AESImc) { - auto Op = IROp->C(); - auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector); - - // Pseudo-code - // Dst = InvMixColumns(STATE) - __uint128_t Tmp{}; - Tmp = AES::InvMixColumns(reinterpret_cast(&Src1)); - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(AESEnc) { - auto Op = IROp->C(); - auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State); - auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key); - - // Pseudo-code - // STATE = Src1 - // RoundKey = Src2 - // STATE = ShiftRows(STATE) - // STATE = SubBytes(STATE) - // STATE = MixColumns(STATE) - // Dst = STATE XOR RoundKey - __uint128_t Tmp{}; - Tmp = AES::ShiftRows(reinterpret_cast(&Src1)); - Tmp = AES::SubBytes(reinterpret_cast(&Tmp), 16); - Tmp = AES::MixColumns(reinterpret_cast(&Tmp)); - Tmp = Tmp ^ Src2; - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(AESEncLast) { - auto Op = IROp->C(); - auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State); - auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key); - - // Pseudo-code - // STATE = Src1 - // RoundKey = Src2 - // STATE = ShiftRows(STATE) - // STATE = SubBytes(STATE) - // Dst = STATE XOR RoundKey - __uint128_t Tmp{}; - Tmp = AES::ShiftRows(reinterpret_cast(&Src1)); - Tmp = AES::SubBytes(reinterpret_cast(&Tmp), 16); - Tmp = Tmp ^ Src2; - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(AESDec) { - auto Op = IROp->C(); - auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State); - auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key); - - // Pseudo-code - // STATE = Src1 - // RoundKey = Src2 - // STATE = InvShiftRows(STATE) - // STATE = InvSubBytes(STATE) - // STATE = InvMixColumns(STATE) - // Dst = STATE XOR RoundKey - __uint128_t Tmp{}; - Tmp = AES::InvShiftRows(reinterpret_cast(&Src1)); - Tmp = AES::InvSubBytes(reinterpret_cast(&Tmp)); - Tmp = AES::InvMixColumns(reinterpret_cast(&Tmp)); - Tmp = Tmp ^ Src2; - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(AESDecLast) { - auto Op = IROp->C(); - auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State); - auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key); - - // Pseudo-code - // STATE = Src1 - // RoundKey = Src2 - // STATE = InvShiftRows(STATE) - // STATE = InvSubBytes(STATE) - // Dst = STATE XOR RoundKey - __uint128_t Tmp{}; - Tmp = AES::InvShiftRows(reinterpret_cast(&Src1)); - Tmp = AES::InvSubBytes(reinterpret_cast(&Tmp)); - Tmp = Tmp ^ Src2; - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(AESKeyGenAssist) { - auto Op = IROp->C(); - const uint8_t *Src1 = GetSrc(Data->SSAData, Op->Src); - - // Pseudo-code - // X3 = Src1[127:96] - // X2 = Src1[95:64] - // X1 = Src1[63:32] - // X0 = Src1[31:30] - // RCON = (Zext)rcon - // Dest[31:0] = SubWord(X1) - // Dest[63:32] = RotWord(SubWord(X1)) XOR RCON - // Dest[95:64] = SubWord(X3) - // Dest[127:96] = RotWord(SubWord(X3)) XOR RCON - __uint128_t Tmp{}; - uint32_t X1{}; - uint32_t X3{}; - memcpy(&X1, &Src1[4], 4); - memcpy(&X3, &Src1[12], 4); - uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast(&X1), 4); - uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast(&X3), 4); - - auto Ror = [] (auto In, auto R) { - auto RotateMask = sizeof(In) * 8 - 1; - R &= RotateMask; - return (In >> R) | (In << (sizeof(In) * 8 - R)); - }; - - uint32_t Rot_X1 = Ror(SubWord_X1, 8); - uint32_t Rot_X3 = Ror(SubWord_X3, 8); - - Tmp = Rot_X3 ^ Op->RCON; - Tmp <<= 32; - Tmp |= SubWord_X3; - Tmp <<= 32; - Tmp |= Rot_X1 ^ Op->RCON; - Tmp <<= 32; - Tmp |= SubWord_X1; - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(CRC32) { - auto Op = IROp->C(); - uint32_t Src1 = *GetSrc(Data->SSAData, Op->Src1); - uint8_t *Src2 = GetSrc(Data->SSAData, Op->Src2); - uint32_t Tmp{}; - - switch (Op->SrcSize) { - case 1: - Tmp = CRC32::crc32cb(Src1, *(uint8_t*)Src2); - break; - case 2: - Tmp = CRC32::crc32ch(Src1, *(uint16_t*)Src2); - break; - case 4: - Tmp = CRC32::crc32cw(Src1, *(uint32_t*)Src2); - break; - case 8: - Tmp = CRC32::crc32cx(Src1, *(uint64_t*)Src2); - break; - default: - LOGMAN_MSG_A_FMT("Unknown CRC32C size: {}", Op->SrcSize); - break; - - } - memcpy(GDP, &Tmp, sizeof(Tmp)); -} - -DEF_OP(PCLMUL) { - auto Op = IROp->C(); - - const auto Selector = Op->Selector; - auto* Dst = GetDest(Data->SSAData, Node); - auto* Src1 = GetSrc(Data->SSAData, Op->Src1); - auto* Src2 = GetSrc(Data->SSAData, Op->Src2); - - const uint64_t TMP1 = (Selector & 0x01) == 0 ? Src1[0] : Src1[1]; - const uint64_t TMP2 = (Selector & 0x10) == 0 ? Src2[0] : Src2[1]; - - const auto make_lo = [](uint64_t lhs, uint64_t rhs) { - uint64_t result = 0; - - for (size_t i = 0; i < 64; i++) { - if ((lhs & (1ULL << i)) != 0) { - result ^= rhs << i; - } - } - - return result; - }; - const auto make_hi = [](uint64_t lhs, uint64_t rhs) { - uint64_t result = 0; - - for (size_t i = 1; i < 64; i++) { - if ((lhs & (1ULL << i)) != 0) { - result ^= rhs >> (64 - i); - } - } - - return result; - }; - - Dst[0] = make_lo(TMP1, TMP2); - Dst[1] = make_hi(TMP1, TMP2); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp b/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp deleted file mode 100644 index 45f1bcd45..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp +++ /dev/null @@ -1,349 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include "Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h" - -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(F80ADD) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80SUB) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80MUL) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80DIV) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80FYL2X) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80ATAN) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80FPREM1) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80FPREM) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80SCALE) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80CVT) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - - switch (OpSize) { - case 4: { - const auto Tmp = CPU::OpHandlers::handle4(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, OpSize); - break; - } - case 8: { - const auto Tmp = CPU::OpHandlers::handle8(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, OpSize); - break; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); - } -} - -DEF_OP(F80CVTINT) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - - switch (OpSize) { - case 2: { - int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers::handle2t : FEXCore::CPU::OpHandlers::handle2)(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(Tmp)); - break; - } - case 4: { - int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers::handle4t : FEXCore::CPU::OpHandlers::handle4)(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(Tmp)); - break; - } - case 8: { - int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers::handle8t : FEXCore::CPU::OpHandlers::handle8)(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(Tmp)); - break; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); - } -} - -DEF_OP(F80CVTTO) { - auto Op = IROp->C(); - - switch (Op->SrcSize) { - case 4: { - float Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle4(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); - break; - } - case 8: { - double Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle8(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); - break; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize); - } -} - -DEF_OP(F80CVTTOINT) { - auto Op = IROp->C(); - - switch (Op->SrcSize) { - case 2: { - int16_t Src = *GetSrc(Data->SSAData, Op->Src); - const auto Tmp = CPU::OpHandlers::handle2(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); - break; - } - case 4: { - int32_t Src = *GetSrc(Data->SSAData, Op->Src); - const auto Tmp = CPU::OpHandlers::handle4(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); - break; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize); - } -} - -DEF_OP(F80ROUND) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80F2XM1) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80TAN) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80SQRT) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80SIN) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80COS) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80XTRACT_EXP) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80XTRACT_SIG) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80CMP) { - auto Op = IROp->C(); - const auto Src1 = *GetSrc(Data->SSAData, Op->X80Src1); - const auto Src2 = *GetSrc(Data->SSAData, Op->X80Src2); - const auto ResultFlags = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2); - - GD = ResultFlags; -} - -DEF_OP(F80BCDLOAD) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F80BCDSTORE) { - auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->X80Src); - const auto Tmp = CPU::OpHandlers::handle(Data->State->CurrentFrame->State.FCW, Src); - memcpy(GDP, &Tmp, sizeof(X80SoftFloat)); -} - -DEF_OP(F64SIN) { - auto Op = IROp->C(); - const double Src = *GetSrc(Data->SSAData, Op->Src); - const double Tmp = sin(Src); - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64COS) { - auto Op = IROp->C(); - const double Src = *GetSrc(Data->SSAData, Op->Src); - const double Tmp = cos(Src); - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64TAN) { - auto Op = IROp->C(); - const double Src = *GetSrc(Data->SSAData, Op->Src); - const double Tmp = tan(Src); - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64F2XM1) { - auto Op = IROp->C(); - const double Src = *GetSrc(Data->SSAData, Op->Src); - const double Tmp = exp2(Src) - 1.0; - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64ATAN) { - auto Op = IROp->C(); - const double Src1 = *GetSrc(Data->SSAData, Op->Src1); - const double Src2 = *GetSrc(Data->SSAData, Op->Src2); - const double Tmp = atan2(Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64FPREM) { - auto Op = IROp->C(); - const double Src1 = *GetSrc(Data->SSAData, Op->Src1); - const double Src2 = *GetSrc(Data->SSAData, Op->Src2); - const double Tmp = fmod(Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64FPREM1) { - auto Op = IROp->C(); - const double Src1 = *GetSrc(Data->SSAData, Op->Src1); - const double Src2 = *GetSrc(Data->SSAData, Op->Src2); - const double Tmp = remainder(Src1, Src2); - - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64FYL2X) { - auto Op = IROp->C(); - const double Src1 = *GetSrc(Data->SSAData, Op->Src); - const double Src2 = *GetSrc(Data->SSAData, Op->Src2); - const double Tmp = Src2 * log2(Src1); - - memcpy(GDP, &Tmp, sizeof(double)); -} - -DEF_OP(F64SCALE) { - auto Op = IROp->C(); - const double Src1 = *GetSrc(Data->SSAData, Op->Src1); - const double Src2 = *GetSrc(Data->SSAData, Op->Src2); - const double trunc = (double)(int64_t)(Src2); //truncate - const double Tmp = Src1 * exp2(trunc); - - memcpy(GDP, &Tmp, sizeof(double)); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp deleted file mode 100644 index 9fd5c6a68..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(GetHostFlag) { - auto Op = IROp->C(); - GD = (*GetSrc(Data->SSAData, Op->Value) >> Op->Flag) & 1; -} -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp deleted file mode 100644 index 7582c3f68..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp +++ /dev/null @@ -1,424 +0,0 @@ -// SPDX-License-Identifier: MIT -#include "Interface/Context/Context.h" -#include "Interface/Core/CPUID.h" -#include "InterpreterDefines.h" -#include "InterpreterOps.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Interface/HLE/Thunks/Thunks.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace FEXCore::CPU { - -using OpHandler = void (*)(IR::IROp_Header *IROp, InterpreterOps::IROpData *Data, IR::NodeID Node); -using OpHandlerArray = std::array; - -constexpr OpHandlerArray InterpreterOpHandlers = [] { - OpHandlerArray Handlers{}; - for (auto& Entry : Handlers) { - Entry = &InterpreterOps::Op_Unhandled; - } - - #define REGISTER_OP(op, x) Handlers[IR::IROps::OP_##op] = &InterpreterOps::Op_##x - - // ALU ops - REGISTER_OP(TRUNCELEMENTPAIR, TruncElementPair); - REGISTER_OP(CONSTANT, Constant); - REGISTER_OP(ENTRYPOINTOFFSET, EntrypointOffset); - REGISTER_OP(INLINECONSTANT, InlineConstant); - REGISTER_OP(INLINEENTRYPOINTOFFSET, InlineEntrypointOffset); - REGISTER_OP(CYCLECOUNTER, CycleCounter); - REGISTER_OP(ADD, Add); - REGISTER_OP(ADDNZCV, AddNZCV); - REGISTER_OP(TESTNZ, TestNZ); - REGISTER_OP(SUB, Sub); - REGISTER_OP(SUBNZCV, SubNZCV); - REGISTER_OP(NEG, Neg); - REGISTER_OP(ABS, Abs); - REGISTER_OP(MUL, Mul); - REGISTER_OP(UMUL, UMul); - REGISTER_OP(DIV, Div); - REGISTER_OP(UDIV, UDiv); - REGISTER_OP(REM, Rem); - REGISTER_OP(UREM, URem); - REGISTER_OP(MULH, MulH); - REGISTER_OP(UMULH, UMulH); - REGISTER_OP(OR, Or); - REGISTER_OP(ORLSHL, Orlshl); - REGISTER_OP(ORLSHR, Orlshr); - REGISTER_OP(AND, And); - REGISTER_OP(ANDN, Andn); - REGISTER_OP(XOR, Xor); - REGISTER_OP(LSHL, Lshl); - REGISTER_OP(LSHR, Lshr); - REGISTER_OP(ASHR, Ashr); - REGISTER_OP(ROR, Ror); - REGISTER_OP(EXTR, Extr); - REGISTER_OP(PDEP, PDep); - REGISTER_OP(PEXT, PExt); - REGISTER_OP(LDIV, LDiv); - REGISTER_OP(LUDIV, LUDiv); - REGISTER_OP(LREM, LRem); - REGISTER_OP(LUREM, LURem); - REGISTER_OP(NOT, Not); - REGISTER_OP(POPCOUNT, Popcount); - REGISTER_OP(FINDLSB, FindLSB); - REGISTER_OP(FINDMSB, FindMSB); - REGISTER_OP(FINDTRAILINGZEROES, FindTrailingZeroes); - REGISTER_OP(COUNTLEADINGZEROES, CountLeadingZeroes); - REGISTER_OP(REV, Rev); - REGISTER_OP(BFI, Bfi); - REGISTER_OP(BFXIL, Bfxil); - REGISTER_OP(BFE, Bfe); - REGISTER_OP(SBFE, Sbfe); - REGISTER_OP(SELECT, Select); - REGISTER_OP(VEXTRACTTOGPR, VExtractToGPR); - REGISTER_OP(FLOAT_TOGPR_ZS, Float_ToGPR_ZS); - REGISTER_OP(FLOAT_TOGPR_S, Float_ToGPR_S); - REGISTER_OP(FCMP, FCmp); - - // Atomic ops - REGISTER_OP(CASPAIR, CASPair); - REGISTER_OP(CAS, CAS); - REGISTER_OP(ATOMICADD, AtomicAdd); - REGISTER_OP(ATOMICSUB, AtomicSub); - REGISTER_OP(ATOMICAND, AtomicAnd); - REGISTER_OP(ATOMICOR, AtomicOr); - REGISTER_OP(ATOMICXOR, AtomicXor); - REGISTER_OP(ATOMICSWAP, AtomicSwap); - REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd); - REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub); - REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd); - REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr); - REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor); - REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg); - REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue); - - // Branch ops - REGISTER_OP(CALLBACKRETURN, CallbackReturn); - REGISTER_OP(EXITFUNCTION, ExitFunction); - REGISTER_OP(JUMP, Jump); - REGISTER_OP(CONDJUMP, CondJump); - REGISTER_OP(SYSCALL, Syscall); - REGISTER_OP(INLINESYSCALL, InlineSyscall); - REGISTER_OP(THUNK, Thunk); - REGISTER_OP(VALIDATECODE, ValidateCode); - REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry); - REGISTER_OP(CPUID, CPUID); - REGISTER_OP(XGETBV, XGETBV); - - // Conversion ops - REGISTER_OP(VINSGPR, VInsGPR); - REGISTER_OP(VCASTFROMGPR, VCastFromGPR); - REGISTER_OP(VDUPFROMGPR, VDupFromGPR); - REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S); - REGISTER_OP(FLOAT_FTOF, Float_FToF); - REGISTER_OP(VECTOR_STOF, Vector_SToF); - REGISTER_OP(VECTOR_FTOZS, Vector_FToZS); - REGISTER_OP(VECTOR_FTOS, Vector_FToS); - REGISTER_OP(VECTOR_FTOF, Vector_FToF); - REGISTER_OP(VECTOR_FTOI, Vector_FToI); - - // Flag ops - REGISTER_OP(GETHOSTFLAG, GetHostFlag); - - // Memory ops - REGISTER_OP(LOADCONTEXT, LoadContext); - REGISTER_OP(STORECONTEXT, StoreContext); - REGISTER_OP(LOADREGISTER, LoadRegister); - REGISTER_OP(STOREREGISTER, StoreRegister); - REGISTER_OP(LOADCONTEXTINDEXED, LoadContextIndexed); - REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed); - REGISTER_OP(SPILLREGISTER, SpillRegister); - REGISTER_OP(FILLREGISTER, FillRegister); - REGISTER_OP(LOADFLAG, LoadFlag); - REGISTER_OP(STOREFLAG, StoreFlag); - REGISTER_OP(LOADMEM, LoadMem); - REGISTER_OP(STOREMEM, StoreMem); - REGISTER_OP(LOADMEMTSO, LoadMem); - REGISTER_OP(STOREMEMTSO, StoreMem); - REGISTER_OP(VLOADVECTORMASKED, VLoadVectorMasked); - REGISTER_OP(VSTOREVECTORMASKED, VStoreVectorMasked); - REGISTER_OP(VLOADVECTORELEMENT, VLoadVectorElement); - REGISTER_OP(VSTOREVECTORELEMENT, VStoreVectorElement); - REGISTER_OP(VBROADCASTFROMMEM, VBroadcastFromMem); - REGISTER_OP(PUSH, Push); - REGISTER_OP(MEMSET, MemSet); - REGISTER_OP(MEMCPY, MemCpy); - REGISTER_OP(CACHELINECLEAR, CacheLineClear); - REGISTER_OP(CACHELINECLEAN, CacheLineClean); - REGISTER_OP(CACHELINEZERO, CacheLineZero); - - // Misc ops - REGISTER_OP(DUMMY, NoOp); - REGISTER_OP(IRHEADER, NoOp); - REGISTER_OP(CODEBLOCK, NoOp); - REGISTER_OP(BEGINBLOCK, NoOp); - REGISTER_OP(ENDBLOCK, NoOp); - REGISTER_OP(GUESTOPCODE, NoOp); - REGISTER_OP(FENCE, Fence); - REGISTER_OP(BREAK, Break); - REGISTER_OP(PRINT, Print); - REGISTER_OP(GETROUNDINGMODE, GetRoundingMode); - REGISTER_OP(SETROUNDINGMODE, SetRoundingMode); - REGISTER_OP(INVALIDATEFLAGS, NoOp); - REGISTER_OP(PROCESSORID, ProcessorID); - REGISTER_OP(RDRAND, RDRAND); - REGISTER_OP(YIELD, Yield); - - // Move ops - REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair); - REGISTER_OP(CREATEELEMENTPAIR, CreateElementPair); - - // Vector ops - REGISTER_OP(VECTORZERO, VectorZero); - REGISTER_OP(VECTORIMM, VectorImm); - REGISTER_OP(LOADNAMEDVECTORCONSTANT, LoadNamedVectorConstant); - REGISTER_OP(LOADNAMEDVECTORINDEXEDCONSTANT, LoadNamedVectorIndexedConstant); - REGISTER_OP(VMOV, VMov); - REGISTER_OP(VAND, VAnd); - REGISTER_OP(VBIC, VBic); - REGISTER_OP(VOR, VOr); - REGISTER_OP(VXOR, VXor); - REGISTER_OP(VADD, VAdd); - REGISTER_OP(VSUB, VSub); - REGISTER_OP(VUQADD, VUQAdd); - REGISTER_OP(VUQSUB, VUQSub); - REGISTER_OP(VSQADD, VSQAdd); - REGISTER_OP(VSQSUB, VSQSub); - REGISTER_OP(VADDP, VAddP); - REGISTER_OP(VADDV, VAddV); - REGISTER_OP(VUMINV, VUMinV); - REGISTER_OP(VURAVG, VURAvg); - REGISTER_OP(VABS, VAbs); - REGISTER_OP(VPOPCOUNT, VPopcount); - REGISTER_OP(VFADD, VFAdd); - REGISTER_OP(VFADDP, VFAddP); - REGISTER_OP(VFSUB, VFSub); - REGISTER_OP(VFMUL, VFMul); - REGISTER_OP(VFDIV, VFDiv); - REGISTER_OP(VFMIN, VFMin); - REGISTER_OP(VFMAX, VFMax); - REGISTER_OP(VFRECP, VFRecp); - REGISTER_OP(VFSQRT, VFSqrt); - REGISTER_OP(VFRSQRT, VFRSqrt); - REGISTER_OP(VNEG, VNeg); - REGISTER_OP(VFNEG, VFNeg); - REGISTER_OP(VNOT, VNot); - REGISTER_OP(VUMIN, VUMin); - REGISTER_OP(VSMIN, VSMin); - REGISTER_OP(VUMAX, VUMax); - REGISTER_OP(VSMAX, VSMax); - REGISTER_OP(VZIP, VZip); - REGISTER_OP(VZIP2, VZip); - REGISTER_OP(VUNZIP, VUnZip); - REGISTER_OP(VUNZIP2, VUnZip); - REGISTER_OP(VTRN, VTrn); - REGISTER_OP(VTRN2, VTrn); - REGISTER_OP(VBSL, VBSL); - REGISTER_OP(VCMPEQ, VCMPEQ); - REGISTER_OP(VCMPEQZ, VCMPEQZ); - REGISTER_OP(VCMPGT, VCMPGT); - REGISTER_OP(VCMPGTZ, VCMPGTZ); - REGISTER_OP(VCMPLTZ, VCMPLTZ); - REGISTER_OP(VFCMPEQ, VFCMPEQ); - REGISTER_OP(VFCMPNEQ, VFCMPNEQ); - REGISTER_OP(VFCMPLT, VFCMPLT); - REGISTER_OP(VFCMPGT, VFCMPGT); - REGISTER_OP(VFCMPLE, VFCMPLE); - REGISTER_OP(VFCMPORD, VFCMPORD); - REGISTER_OP(VFCMPUNO, VFCMPUNO); - REGISTER_OP(VUSHL, VUShl); - REGISTER_OP(VUSHR, VUShr); - REGISTER_OP(VSSHR, VSShr); - REGISTER_OP(VUSHLS, VUShlS); - REGISTER_OP(VUSHRS, VUShrS); - REGISTER_OP(VSSHRS, VSShrS); - REGISTER_OP(VUSHLSWIDE, VUShlSWide); - REGISTER_OP(VUSHRSWIDE, VUShrSWide); - REGISTER_OP(VSSHRSWIDE, VSShrSWide); - REGISTER_OP(VINSELEMENT, VInsElement); - REGISTER_OP(VDUPELEMENT, VDupElement); - REGISTER_OP(VEXTR, VExtr); - REGISTER_OP(VUSHRI, VUShrI); - REGISTER_OP(VSSHRI, VSShrI); - REGISTER_OP(VSHLI, VShlI); - REGISTER_OP(VUSHRNI, VUShrNI); - REGISTER_OP(VUSHRNI2, VUShrNI2); - REGISTER_OP(VSXTL, VSXTL); - REGISTER_OP(VSXTL2, VSXTL2); - REGISTER_OP(VUXTL, VUXTL); - REGISTER_OP(VUXTL2, VUXTL2); - REGISTER_OP(VSQXTN, VSQXTN); - REGISTER_OP(VSQXTN2, VSQXTN2); - REGISTER_OP(VSQXTNPAIR, VSQXTNPair); - REGISTER_OP(VSQXTUN, VSQXTUN); - REGISTER_OP(VSQXTUN2, VSQXTUN2); - REGISTER_OP(VSQXTUNPAIR, VSQXTUNPair); - REGISTER_OP(VUMUL, VUMul); - REGISTER_OP(VSMUL, VSMul); - REGISTER_OP(VUMULL, VUMull); - REGISTER_OP(VSMULL, VSMull); - REGISTER_OP(VUMULL2, VUMull2); - REGISTER_OP(VSMULL2, VSMull2); - REGISTER_OP(VUMULH, VUMulH); - REGISTER_OP(VSMULH, VSMulH); - REGISTER_OP(VUABDL, VUABDL); - REGISTER_OP(VUABDL2, VUABDL2); - REGISTER_OP(VTBL1, VTBL1); - REGISTER_OP(VTBL2, VTBL2); - REGISTER_OP(VREV32, VRev32); - REGISTER_OP(VREV64, VRev64); - REGISTER_OP(VPCMPESTRX, VPCMPESTRX); - REGISTER_OP(VPCMPISTRX, VPCMPISTRX); - REGISTER_OP(VFCADD, VFCADD); - - // Encryption ops - REGISTER_OP(VAESIMC, AESImc); - REGISTER_OP(VAESENC, AESEnc); - REGISTER_OP(VAESENCLAST, AESEncLast); - REGISTER_OP(VAESDEC, AESDec); - REGISTER_OP(VAESDECLAST, AESDecLast); - REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist); - REGISTER_OP(CRC32, CRC32); - REGISTER_OP(PCLMUL, PCLMUL); - - // F80 ops - REGISTER_OP(F80ADD, F80ADD); - REGISTER_OP(F80SUB, F80SUB); - REGISTER_OP(F80MUL, F80MUL); - REGISTER_OP(F80DIV, F80DIV); - REGISTER_OP(F80FYL2X, F80FYL2X); - REGISTER_OP(F80ATAN, F80ATAN); - REGISTER_OP(F80FPREM1, F80FPREM1); - REGISTER_OP(F80FPREM, F80FPREM); - REGISTER_OP(F80SCALE, F80SCALE); - REGISTER_OP(F80CVT, F80CVT); - REGISTER_OP(F80CVTINT, F80CVTINT); - REGISTER_OP(F80CVTTO, F80CVTTO); - REGISTER_OP(F80CVTTOINT, F80CVTTOINT); - REGISTER_OP(F80ROUND, F80ROUND); - REGISTER_OP(F80F2XM1, F80F2XM1); - REGISTER_OP(F80TAN, F80TAN); - REGISTER_OP(F80SQRT, F80SQRT); - REGISTER_OP(F80SIN, F80SIN); - REGISTER_OP(F80COS, F80COS); - REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP); - REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG); - REGISTER_OP(F80CMP, F80CMP); - REGISTER_OP(F80BCDLOAD, F80BCDLOAD); - REGISTER_OP(F80BCDSTORE, F80BCDSTORE); - - // F64 ops - REGISTER_OP(F64SIN, F64SIN); - REGISTER_OP(F64COS, F64COS); - REGISTER_OP(F64TAN, F64TAN); - REGISTER_OP(F64F2XM1, F64F2XM1); - REGISTER_OP(F64ATAN, F64ATAN); - REGISTER_OP(F64FPREM, F64FPREM); - REGISTER_OP(F64FPREM1, F64FPREM1); - REGISTER_OP(F64FYL2X, F64FYL2X); - REGISTER_OP(F64SCALE, F64SCALE); - - return Handlers; -}(); - -void InterpreterOps::Op_Unhandled(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) { - LOGMAN_MSG_A_FMT("Unhandled IR Op: {}", FEXCore::IR::GetName(IROp->Op)); -} - -void InterpreterOps::Op_NoOp(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) { -} - -void InterpreterOps::InterpretIR(FEXCore::Core::CpuStateFrame *Frame, FEXCore::IR::IRListView const *CurrentIR) { - volatile void *StackEntry = alloca(0); - - const uintptr_t ListSize = CurrentIR->GetSSACount(); - - static_assert(sizeof(FEXCore::IR::OrderedNode) == 16); - - auto BlockEnd = CurrentIR->GetBlocks().end(); - - // SSA data elements must be able to accommodate data that would - // fit inside the largest vector size (otherwise vector operations - // go kaboom, and we don't want that). - const size_t SSADataSize = ListSize * MaxInterpeterVectorSize; - - InterpreterOps::IROpData OpData{ - .State = Frame->Thread, - .CurrentEntry = Frame->State.rip, - .CurrentIR = CurrentIR, - .StackEntry = StackEntry, - .SSAData = alloca(SSADataSize), - .BlockResults = {}, - .BlockIterator = CurrentIR->GetBlocks().begin(), - }; - - // Clear all SSAData entries to zero. Required for Zero-extend semantics - memset(OpData.SSAData, 0, SSADataSize); - - while (1) { - using namespace FEXCore::IR; - auto [BlockNode, BlockHeader] = OpData.BlockIterator(); - auto BlockIROp = BlockHeader->CW(); - LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block"); - - // Reset the block results per block - memset(&OpData.BlockResults, 0, sizeof(OpData.BlockResults)); - - auto CodeBegin = CurrentIR->at(BlockIROp->Begin); - auto CodeLast = CurrentIR->at(BlockIROp->Last); - - for (auto [CodeNode, IROp] : CurrentIR->GetCode(BlockNode)) { - const auto ID = CurrentIR->GetID(CodeNode); - const uint32_t Op = IROp->Op; - - // Execute handler - OpHandler Handler = InterpreterOpHandlers[Op]; - - Handler(IROp, &OpData, ID); - - if (OpData.BlockResults.Quit || - OpData.BlockResults.Redo || - CodeBegin == CodeLast) { - break; - } - - ++CodeBegin; - } - - // Iterator will have been set, go again - if (OpData.BlockResults.Redo) { - continue; - } - - // If we have set to early exit or at the end block then leave - if (OpData.BlockResults.Quit || ++OpData.BlockIterator == BlockEnd) { - break; - } - } -} - -} diff --git a/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp deleted file mode 100644 index bf134fb60..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp +++ /dev/null @@ -1,859 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/CPUID.h" -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -namespace FEXCore::CPU { -static inline void CacheLineFlush(char *Addr) { -#ifdef _M_X86_64 - __asm volatile ( - "clflush (%[Addr]);" - :: [Addr] "r" (Addr) - : "memory"); -#else - __builtin___clear_cache(Addr, Addr+64); -#endif -} - -static inline void CacheLineClean(char *Addr) { -#ifdef _M_X86_64 - __asm volatile ( - "clwb (%[Addr]);" - :: [Addr] "r" (Addr) - : "memory"); -#elif _M_ARM_64 - __asm volatile ( - "dc cvac, %[Addr]" - :: [Addr] "r" (Addr) - : "memory"); -#else - LOGMAN_THROW_A_FMT("Unsupported architecture with cacheline clean"); -#endif -} - -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(LoadContext) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Src = ContextPtr + Op->Offset; - - #define LOAD_CTX(x, y) \ - case x: { \ - y const *MemData = reinterpret_cast(Src); \ - GD = *MemData; \ - break; \ - } - - switch (OpSize) { - LOAD_CTX(1, uint8_t) - LOAD_CTX(2, uint16_t) - LOAD_CTX(4, uint32_t) - LOAD_CTX(8, uint64_t) - case 16: - case 32: { - void const *MemData = reinterpret_cast(Src); - memcpy(GDP, MemData, OpSize); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); - break; - } - #undef LOAD_CTX -} - -DEF_OP(StoreContext) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Dst = ContextPtr + Op->Offset; - - void *MemData = reinterpret_cast(Dst); - void *Src = GetSrc(Data->SSAData, Op->Value); - memcpy(MemData, Src, OpSize); -} - -DEF_OP(LoadRegister) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Src = ContextPtr + Op->Offset; - - #define LOAD_CTX(x, y) \ - case x: { \ - y const *MemData = reinterpret_cast(Src); \ - GD = *MemData; \ - break; \ - } - - switch (OpSize) { - LOAD_CTX(1, uint8_t) - LOAD_CTX(2, uint16_t) - LOAD_CTX(4, uint32_t) - LOAD_CTX(8, uint64_t) - case 16: - case 32: { - void const *MemData = reinterpret_cast(Src); - memcpy(GDP, MemData, OpSize); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); - break; - } - #undef LOAD_CTX -} - -DEF_OP(StoreRegister) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Dst = ContextPtr + Op->Offset; - - void *MemData = reinterpret_cast(Dst); - void *Src = GetSrc(Data->SSAData, Op->Value); - memcpy(MemData, Src, OpSize); -} - -DEF_OP(LoadContextIndexed) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto Index = *GetSrc(Data->SSAData, Op->Index); - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Src = ContextPtr + Op->BaseOffset + (Index * Op->Stride); - - #define LOAD_CTX(x, y) \ - case x: { \ - y const *MemData = reinterpret_cast(Src); \ - GD = *MemData; \ - break; \ - } - - switch (OpSize) { - LOAD_CTX(1, uint8_t) - LOAD_CTX(2, uint16_t) - LOAD_CTX(4, uint32_t) - LOAD_CTX(8, uint64_t) - case 16: - case 32: { - void const *MemData = reinterpret_cast(Src); - memcpy(GDP, MemData, OpSize); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); - break; - } - #undef LOAD_CTX -} - -DEF_OP(StoreContextIndexed) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto Index = *GetSrc(Data->SSAData, Op->Index); - - const auto ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - const auto Dst = ContextPtr + Op->BaseOffset + (Index * Op->Stride); - - void *MemData = reinterpret_cast(Dst); - void *Src = GetSrc(Data->SSAData, Op->Value); - memcpy(MemData, Src, OpSize); -} - -DEF_OP(SpillRegister) { - LOGMAN_MSG_A_FMT("Unimplemented"); -} - -DEF_OP(FillRegister) { - LOGMAN_MSG_A_FMT("Unimplemented"); -} - -DEF_OP(LoadFlag) { - auto Op = IROp->C(); - - uintptr_t ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]); - ContextPtr += Op->Flag; - - if (Op->Flag == 24 /* NZCV */) { - uint32_t const *MemData = reinterpret_cast(ContextPtr); - GD = *MemData; - } else { - uint8_t const *MemData = reinterpret_cast(ContextPtr); - GD = *MemData; - } -} - -DEF_OP(StoreFlag) { - auto Op = IROp->C(); - uint32_t Arg = *GetSrc(Data->SSAData, Op->Value); - - uintptr_t ContextPtr = reinterpret_cast(Data->State->CurrentFrame); - ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]); - ContextPtr += Op->Flag; - - if (Op->Flag == 24 /* NZCV */) { - uint32_t *MemData = reinterpret_cast(ContextPtr); - *MemData = Arg; - } else { - uint8_t *MemData = reinterpret_cast(ContextPtr); - *MemData = Arg; - } -} - -DEF_OP(LoadMem) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - uint8_t const *MemData = *GetSrc(Data->SSAData, Op->Addr); - - if (!Op->Offset.IsInvalid()) { - auto Offset = *GetSrc(Data->SSAData, Op->Offset) * Op->OffsetScale; - - switch(Op->OffsetType.Val) { - case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break; - case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break; - case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break; - } - } - - memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE); - switch (OpSize) { - case 1: { - auto D = reinterpret_cast*>(MemData); - GD = D->load(); - break; - } - case 2: { - auto D = reinterpret_cast*>(MemData); - GD = D->load(); - break; - } - case 4: { - auto D = reinterpret_cast*>(MemData); - GD = D->load(); - break; - } - case 8: { - auto D = reinterpret_cast*>(MemData); - GD = D->load(); - break; - } - default: - memcpy(GDP, MemData, OpSize); - break; - } -} - -DEF_OP(StoreMem) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - uint8_t *MemData = *GetSrc(Data->SSAData, Op->Addr); - - if (!Op->Offset.IsInvalid()) { - auto Offset = *GetSrc(Data->SSAData, Op->Offset) * Op->OffsetScale; - - switch(Op->OffsetType.Val) { - case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break; - case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break; - case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break; - } - } - switch (OpSize) { - case 1: { - reinterpret_cast*>(MemData)->store(*GetSrc(Data->SSAData, Op->Value)); - break; - } - case 2: { - reinterpret_cast*>(MemData)->store(*GetSrc(Data->SSAData, Op->Value)); - break; - } - case 4: { - reinterpret_cast*>(MemData)->store(*GetSrc(Data->SSAData, Op->Value)); - break; - } - case 8: { - reinterpret_cast*>(MemData)->store(*GetSrc(Data->SSAData, Op->Value)); - break; - } - - default: - memcpy(MemData, GetSrc(Data->SSAData, Op->Value), OpSize); - break; - } -} - -DEF_OP(VLoadVectorMasked) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = IROp->ElementSize; - const auto NumElements = OpSize / ElementSize; - - const auto *MemData = *GetSrc(Data->SSAData, Op->Addr); - const auto *Mask = GetSrc(Data->SSAData, Op->Mask); - - const auto SetElements = [NumElements](void* Dst, const T* MaskValues, const T* MemoryData) { - const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1); - for (size_t i = 0; i < NumElements; i++) { - if ((MaskValues[i] & SignBit) != 0) { - std::memcpy(static_cast(Dst) + (i * sizeof(T)), MemoryData + i, sizeof(T)); - } - } - }; - - if (!Op->Offset.IsInvalid()) { - auto Offset = *GetSrc(Data->SSAData, Op->Offset) * Op->OffsetScale; - - switch(Op->OffsetType.Val) { - case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break; - case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break; - case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break; - } - } - - memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE); - switch (ElementSize) { - case 1: { - SetElements(GDP, Mask, MemData); - return; - } - case 2: { - SetElements(GDP, - reinterpret_cast(Mask), - reinterpret_cast(MemData)); - return; - } - case 4: { - SetElements(GDP, - reinterpret_cast(Mask), - reinterpret_cast(MemData)); - return; - } - case 8: { - SetElements(GDP, - reinterpret_cast(Mask), - reinterpret_cast(MemData)); - return; - } - default: - LOGMAN_MSG_A_FMT("Unhandled VLoadVectorMasked element size: {}", ElementSize); - return; - } -} - -DEF_OP(VStoreVectorMasked) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = IROp->ElementSize; - const auto NumElements = OpSize / ElementSize; - - auto *Dst = *GetSrc(Data->SSAData, Op->Addr); - const auto *RegData = GetSrc(Data->SSAData, Op->Data); - const auto *Mask = GetSrc(Data->SSAData, Op->Mask); - - const auto SetElements = [NumElements](void* Dst, const T* MaskValues, const T* DataVals) { - const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1); - for (size_t i = 0; i < NumElements; i++) { - if ((MaskValues[i] & SignBit) != 0) { - std::memcpy(static_cast(Dst) + (i * sizeof(T)), DataVals + i, sizeof(T)); - } - } - }; - - if (!Op->Offset.IsInvalid()) { - auto Offset = *GetSrc(Data->SSAData, Op->Offset) * Op->OffsetScale; - - switch(Op->OffsetType.Val) { - case IR::MEM_OFFSET_SXTX.Val: Dst += Offset; break; - case IR::MEM_OFFSET_UXTW.Val: Dst += (uint32_t)Offset; break; - case IR::MEM_OFFSET_SXTW.Val: Dst += (int32_t)Offset; break; - } - } - - switch (ElementSize) { - case 1: { - SetElements(Dst, Mask, RegData); - return; - } - case 2: { - SetElements(Dst, - reinterpret_cast(Mask), - reinterpret_cast(RegData)); - return; - } - case 4: { - SetElements(Dst, - reinterpret_cast(Mask), - reinterpret_cast(RegData)); - return; - } - case 8: { - SetElements(Dst, - reinterpret_cast(Mask), - reinterpret_cast(RegData)); - return; - } - default: - LOGMAN_MSG_A_FMT("Unhandled VStoreVectorMasked element size: {}", ElementSize); - return; - } -} - -DEF_OP(VLoadVectorElement) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = IROp->ElementSize; - - auto *Mem = *GetSrc(Data->SSAData, Op->Addr); - const auto *DstSrc = GetSrc(Data->SSAData, Op->DstSrc); - - const auto SetElements = [](void* Dst, const T* MemPtr, const auto Index) { - std::memcpy(static_cast(Dst) + (Index * sizeof(T)), MemPtr, sizeof(T)); - }; - - // Copy the source data first. - memcpy(GDP, DstSrc, OpSize); - - switch (ElementSize) { - case 1: { - SetElements(GDP, - Mem, - Op->Index); - return; - } - case 2: { - SetElements(GDP, - reinterpret_cast(Mem), - Op->Index); - return; - } - case 4: { - SetElements(GDP, - reinterpret_cast(Mem), - Op->Index); - return; - } - case 8: { - SetElements(GDP, - reinterpret_cast(Mem), - Op->Index); - return; - } - default: - LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize); - return; - } -} - -DEF_OP(VStoreVectorElement) { - const auto Op = IROp->C(); - const auto ElementSize = IROp->ElementSize; - - auto *Mem = *GetSrc(Data->SSAData, Op->Addr); - const auto *Value = GetSrc(Data->SSAData, Op->Value); - - const auto StoreElements = [](void* MemPtr, const T* Src, const auto Index) { - std::memcpy(MemPtr, reinterpret_cast(Src) + (Index * sizeof(T)), sizeof(T)); - }; - - switch (ElementSize) { - case 1: { - StoreElements(Mem, - Value, - Op->Index); - return; - } - case 2: { - StoreElements(Mem, - reinterpret_cast(Value), - Op->Index); - return; - } - case 4: { - StoreElements(Mem, - reinterpret_cast(Value), - Op->Index); - return; - } - case 8: { - StoreElements(Mem, - reinterpret_cast(Value), - Op->Index); - return; - } - default: - LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize); - return; - } -} - -DEF_OP(VBroadcastFromMem) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = IROp->ElementSize; - const auto NumElements = OpSize / ElementSize; - - const auto *MemData = *GetSrc(Data->SSAData, Op->Address); - - const auto BroadcastElement = [NumElements](void* Dst, const T* MemPtr) { - auto* DstU8 = static_cast(Dst); - - for (size_t i = 0; i < NumElements; i++) { - std::memcpy(DstU8 + (i * sizeof(T)), MemPtr, sizeof(T)); - } - }; - - switch (ElementSize) { - case 1: - BroadcastElement(GDP, MemData); - break; - case 2: - BroadcastElement(GDP, reinterpret_cast(MemData)); - break; - case 4: - BroadcastElement(GDP, reinterpret_cast(MemData)); - break; - case 8: - BroadcastElement(GDP, reinterpret_cast(MemData)); - break; - case 16: - BroadcastElement(GDP, reinterpret_cast(MemData)); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem element size: {}", ElementSize); - break; - } -} - -DEF_OP(Push) { - const auto Op = IROp->C(); - const auto ValueSize = Op->ValueSize; - - uint64_t MemData = *GetSrc(Data->SSAData, Op->Addr); - - switch (ValueSize) { - case 1: { - *reinterpret_cast(MemData - ValueSize) = *GetSrc(Data->SSAData, Op->Value); - break; - } - case 2: { - *reinterpret_cast(MemData - ValueSize) = *GetSrc(Data->SSAData, Op->Value); - break; - } - case 4: { - *reinterpret_cast(MemData - ValueSize) = *GetSrc(Data->SSAData, Op->Value); - break; - } - case 8: { - *reinterpret_cast(MemData - ValueSize) = *GetSrc(Data->SSAData, Op->Value); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize); - break; - } - - GD = MemData - ValueSize; -} - -DEF_OP(MemSet) { - const auto Op = IROp->C(); - const int32_t Size = Op->Size; - - char *MemData = *GetSrc(Data->SSAData, Op->Addr); - uint64_t MemPrefix{}; - if (!Op->Prefix.IsInvalid()) { - MemPrefix = *GetSrc(Data->SSAData, Op->Prefix); - } - - const auto Value = *GetSrc(Data->SSAData, Op->Value); - const auto Length = *GetSrc(Data->SSAData, Op->Length); - const auto Direction = *GetSrc(Data->SSAData, Op->Direction); - - auto MemSetElements = [](auto* Memory, uint64_t Value, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - Memory[i] = Value; - } - }; - - auto MemSetElementsInverse = [](auto* Memory, uint64_t Value, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - Memory[-i] = Value; - } - }; - - if (Direction == 0) { // Forward - if (Op->IsAtomic) { - switch (Size) { - case 1: - MemSetElements(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 2: - MemSetElements(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 4: - MemSetElements(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 8: - MemSetElements(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - else { - switch (Size) { - case 1: - MemSetElements(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 2: - MemSetElements(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 4: - MemSetElements(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 8: - MemSetElements(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - GD = reinterpret_cast(MemData + (Length * Size)); - } - else { // Backward - if (Op->IsAtomic) { - switch (Size) { - case 1: - MemSetElementsInverse(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 2: - MemSetElementsInverse(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 4: - MemSetElementsInverse(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - case 8: - MemSetElementsInverse(reinterpret_cast*>(MemData + MemPrefix), Value, Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - else { - switch (Size) { - case 1: - MemSetElementsInverse(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 2: - MemSetElementsInverse(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 4: - MemSetElementsInverse(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - case 8: - MemSetElementsInverse(reinterpret_cast(MemData + MemPrefix), Value, Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - GD = reinterpret_cast(MemData - (Length * Size)); - } -} - -DEF_OP(MemCpy) { - const auto Op = IROp->C(); - const int32_t Size = Op->Size; - - uint64_t *DstPtr = GetDest(Data->SSAData, Node); - - char *MemDataDest = *GetSrc(Data->SSAData, Op->AddrDest); - char *MemDataSrc = *GetSrc(Data->SSAData, Op->AddrSrc); - - uint64_t DestPrefix{}; - uint64_t SrcPrefix{}; - if (!Op->PrefixDest.IsInvalid()) { - DestPrefix = *GetSrc(Data->SSAData, Op->PrefixDest); - - } - if (!Op->PrefixSrc.IsInvalid()) { - SrcPrefix = *GetSrc(Data->SSAData, Op->PrefixSrc); - } - - const auto Length = *GetSrc(Data->SSAData, Op->Length); - const auto Direction = *GetSrc(Data->SSAData, Op->Direction); - - auto MemSetElementsAtomic = [](auto* MemDst, auto* MemSrc, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - MemDst[i].store(MemSrc[i].load()); - } - }; - - auto MemSetElementsAtomicInverse = [](auto* MemDst, auto* MemSrc, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - MemDst[-i].store(MemSrc[-i].load()); - } - }; - - auto MemSetElements = [](auto* MemDst, auto* MemSrc, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - MemDst[i] = MemSrc[i]; - } - }; - - auto MemSetElementsInverse = [](auto* MemDst, auto* MemSrc, size_t Length) { - for (size_t i = 0; i < Length; ++i) { - MemDst[-i] = MemSrc[-i]; - } - }; - - if (Direction == 0) { // Forward - if (Op->IsAtomic) { - switch (Size) { - case 1: - MemSetElementsAtomic(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 2: - MemSetElementsAtomic(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 4: - MemSetElementsAtomic(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 8: - MemSetElementsAtomic(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - else { - switch (Size) { - case 1: - MemSetElements(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 2: - MemSetElements(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 4: - MemSetElements(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 8: - MemSetElements(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - DstPtr[0] = reinterpret_cast(MemDataDest + (Length * Size)); - DstPtr[1] = reinterpret_cast(MemDataSrc + (Length * Size)); - } - else { // Backward - if (Op->IsAtomic) { - switch (Size) { - case 1: - MemSetElementsAtomicInverse(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 2: - MemSetElementsAtomicInverse(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 4: - MemSetElementsAtomicInverse(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - case 8: - MemSetElementsAtomicInverse(reinterpret_cast*>(MemDataDest + DestPrefix), reinterpret_cast*>(MemDataSrc + SrcPrefix), Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - else { - switch (Size) { - case 1: - MemSetElementsInverse(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 2: - MemSetElementsInverse(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 4: - MemSetElementsInverse(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - case 8: - MemSetElementsInverse(reinterpret_cast(MemDataDest + DestPrefix), reinterpret_cast(MemDataSrc + SrcPrefix), Length); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; - } - } - DstPtr[0] = reinterpret_cast(MemDataDest - (Length * Size)); - DstPtr[1] = reinterpret_cast(MemDataSrc - (Length * Size)); - } -} - -DEF_OP(CacheLineClear) { - auto Op = IROp->C(); - - char *MemData = *GetSrc(Data->SSAData, Op->Addr); - - // 64-byte cache line clear - CacheLineFlush(MemData); -} - -DEF_OP(CacheLineClean) { - auto Op = IROp->C(); - - char *MemData = *GetSrc(Data->SSAData, Op->Addr); - - // 64-byte cache line clear - CacheLineClean(MemData); -} - -DEF_OP(CacheLineZero) { - auto Op = IROp->C(); - - uintptr_t MemData = *GetSrc(Data->SSAData, Op->Addr); - - // Force cacheline alignment - MemData = MemData & ~(CPUIDEmu::CACHELINE_SIZE - 1); - - using DataType = uint64_t; - DataType *MemData64 = reinterpret_cast(MemData); - - // 64-byte cache line zero - for (size_t i = 0; i < (CPUIDEmu::CACHELINE_SIZE / sizeof(DataType)); ++i) { - MemData64[i] = 0; - } -} - -#undef DEF_OP -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp deleted file mode 100644 index ff4e217ea..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Context/Context.h" - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -#include -#ifdef _M_X86_64 -#include -#endif -#include - -namespace FEXCore::CPU { - -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(Fence) { - auto Op = IROp->C(); - switch (Op->Fence) { - case IR::Fence_Load.Val: - std::atomic_thread_fence(std::memory_order_acquire); - break; - case IR::Fence_LoadStore.Val: - std::atomic_thread_fence(std::memory_order_seq_cst); - break; - case IR::Fence_Store.Val: - std::atomic_thread_fence(std::memory_order_release); - break; - default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break; - } -} - -DEF_OP(Break) { - auto Op = IROp->C(); - - Data->State->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = 1; - Data->State->CurrentFrame->SynchronousFaultData.Signal = Op->Reason.Signal; - Data->State->CurrentFrame->SynchronousFaultData.TrapNo = Op->Reason.TrapNumber; - Data->State->CurrentFrame->SynchronousFaultData.err_code = Op->Reason.ErrorRegister; - Data->State->CurrentFrame->SynchronousFaultData.si_code = Op->Reason.si_code; - - switch (Op->Reason.Signal) { - case SIGILL: - FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGILL); - break; - case SIGTRAP: - FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP); - break; - case SIGSEGV: - FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGSEGV); - break; - default: - FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP); - break; - } -} - -DEF_OP(GetRoundingMode) { - uint32_t GuestRounding{}; -#ifdef _M_ARM_64 - uint64_t Tmp{}; - __asm(R"( - mrs %[Tmp], FPCR; - )" - : [Tmp] "=r" (Tmp)); - // Extract the rounding - // On ARM the ordering is different than on x86 - GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0; - uint8_t RoundingMode = (Tmp >> 22) & 0b11; - if (RoundingMode == 0) - GuestRounding |= IR::ROUND_MODE_NEAREST; - else if (RoundingMode == 1) - GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY; - else if (RoundingMode == 2) - GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY; - else if (RoundingMode == 3) - GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO; -#else - GuestRounding = _mm_getcsr(); - - // Extract the rounding - GuestRounding = (GuestRounding >> 13) & 0b111; -#endif - memcpy(GDP, &GuestRounding, sizeof(GuestRounding)); -} - -DEF_OP(SetRoundingMode) { - auto Op = IROp->C(); - const auto GuestRounding = *GetSrc(Data->SSAData, Op->RoundMode); -#ifdef _M_ARM_64 - uint64_t HostRounding{}; - __asm volatile(R"( - mrs %[Tmp], FPCR; - )" - : [Tmp] "=r" (HostRounding)); - // Mask out the rounding - HostRounding &= ~(0b111 << 22); - - HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0; - - uint8_t RoundingMode = GuestRounding & 0b11; - if (RoundingMode == IR::ROUND_MODE_NEAREST) - HostRounding |= (0b00U << 22); - else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY) - HostRounding |= (0b01U << 22); - else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY) - HostRounding |= (0b10U << 22); - else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO) - HostRounding |= (0b11U << 22); - - __asm volatile(R"( - msr FPCR, %[Tmp]; - )" - :: [Tmp] "r" (HostRounding)); -#else - uint32_t HostRounding = _mm_getcsr(); - - // Cut out the host rounding mode - HostRounding &= ~(0b111 << 13); - - // Insert our new rounding mode - HostRounding |= GuestRounding << 13; - _mm_setcsr(HostRounding); -#endif -} - -DEF_OP(Print) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - if (OpSize <= 8) { - const auto Src = *GetSrc(Data->SSAData, Op->Value); - LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src); - } - else if (OpSize == 16) { - const auto Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Value); - const uint64_t Src0 = Src; - const uint64_t Src1 = Src >> 64; - LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0); - LogMan::Msg::IFmt(" Value[1] in Arg: 0x{:x}, {}", Src1, Src1); - } - else - LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize); -} - -DEF_OP(ProcessorID) { - uint32_t CPU, CPUNode; - FHU::Syscalls::getcpu(&CPU, &CPUNode); - GD = (CPUNode << 12) | CPU; -} - -DEF_OP(RDRAND) { - // We are ignoring Op->GetReseeded in the interpreter - uint64_t *DstPtr = GetDest(Data->SSAData, Node); - ssize_t Result = ::getrandom(&DstPtr[0], 8, 0); - - // Second result is if we managed to read a valid random number or not - DstPtr[1] = Result == 8 ? 1 : 0; -} - -DEF_OP(Yield) { - // Nop implementation -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp deleted file mode 100644 index 5ad624c4f..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(ExtractElementPair) { - auto Op = IROp->C(); - const auto Src = GetSrc(Data->SSAData, Op->Pair); - memcpy(GDP, - reinterpret_cast(Src + Op->Header.Size * Op->Element), Op->Header.Size); -} - -DEF_OP(CreateElementPair) { - auto Op = IROp->C(); - const void *Src_Lower = GetSrc(Data->SSAData, Op->Lower); - const void *Src_Upper = GetSrc(Data->SSAData, Op->Upper); - - uint8_t *Dst = GetDest(Data->SSAData, Node); - - memcpy(Dst, Src_Lower, IROp->ElementSize); - memcpy(Dst + IROp->ElementSize, Src_Upper, IROp->ElementSize); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp b/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp deleted file mode 100644 index d8c5ba85a..000000000 --- a/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp +++ /dev/null @@ -1,2646 +0,0 @@ -// SPDX-License-Identifier: MIT -/* -$info$ -tags: backend|interpreter -$end_info$ -*/ - -#include "Interface/Core/Interpreter/InterpreterClass.h" -#include "Interface/Core/Interpreter/InterpreterOps.h" -#include "Interface/Core/Interpreter/InterpreterDefines.h" - -#include "Interface/Core/Interpreter/Fallbacks/VectorFallbacks.h" - -#include -#include - -#include -#include -#include -#include -#include - -namespace FEXCore::CPU { -#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) -DEF_OP(VectorZero) { - uint8_t OpSize = IROp->Size; - memset(GDP, 0, OpSize); -} - -DEF_OP(VectorImm) { - auto Op = IROp->C(); - uint8_t OpSize = IROp->Size; - - TempVectorDataArray Tmp;; - - uint8_t Elements = OpSize / Op->Header.ElementSize; - uint8_t Imm = Op->Immediate; - - auto Func = [Imm]() { return Imm; }; - switch (Op->Header.ElementSize) { - DO_VECTOR_0SRC_OP(1, int8_t, Func) - DO_VECTOR_0SRC_OP(2, int16_t, Func) - DO_VECTOR_0SRC_OP(4, int32_t, Func) - DO_VECTOR_0SRC_OP(8, int64_t, Func) - default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(LoadNamedVectorConstant) { - auto Op = IROp->C(); - uint8_t OpSize = IROp->Size; - - switch (Op->Constant) { - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: - memset(GDP, 0, OpSize); - return; - default: - // Intentionally doing nothing. - break; - } - - memcpy(GDP, reinterpret_cast(Data->State->CurrentFrame->Pointers.Common.NamedVectorConstantPointers[Op->Constant]), OpSize); -} - -DEF_OP(LoadNamedVectorIndexedConstant) { - auto Op = IROp->C(); - uint8_t OpSize = IROp->Size; - - memcpy(GDP, reinterpret_cast(Data->State->CurrentFrame->Pointers.Common.IndexedNamedVectorConstantPointers[Op->Constant] + Op->Index), OpSize); -} - -DEF_OP(VMov) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - LOGMAN_THROW_AA_FMT(OpSize <= Core::CPUState::XMM_AVX_REG_SIZE, - "Moves larger than 256-bit aren't supported"); - - const auto Src = *GetSrc(Data->SSAData, Op->Source); - - memcpy(GDP, &Src, OpSize); -} - -DEF_OP(VAnd) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - - if (Is256Bit) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Vector2); - - const auto Dst = InterpVector256{ - .Lower = Src1.Lower & Src2.Lower, - .Upper = Src1.Upper & Src2.Upper, - }; - - memcpy(GDP, &Dst, sizeof(Dst)); - } else { - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector2); - - const auto Dst = Src1 & Src2; - memcpy(GDP, &Dst, sizeof(Dst)); - } -} - -DEF_OP(VBic) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - - if (Is256Bit) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Vector2); - - const auto Dst = InterpVector256{ - .Lower = Src1.Lower & ~Src2.Lower, - .Upper = Src1.Upper & ~Src2.Upper, - }; - - memcpy(GDP, &Dst, sizeof(Dst)); - } else { - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector2); - - const auto Dst = Src1 & ~Src2; - memcpy(GDP, &Dst, sizeof(Dst)); - } -} - -DEF_OP(VOr) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - - if (Is256Bit) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Vector2); - - const auto Dst = InterpVector256{ - .Lower = Src1.Lower | Src2.Lower, - .Upper = Src1.Upper | Src2.Upper, - }; - - memcpy(GDP, &Dst, sizeof(Dst)); - } else { - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector2); - - const auto Dst = Src1 | Src2; - memcpy(GDP, &Dst, sizeof(Dst)); - } -} - -DEF_OP(VXor) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - - if (Is256Bit) { - const auto Src1 = *GetSrc(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc(Data->SSAData, Op->Vector2); - - const auto Dst = InterpVector256{ - .Lower = Src1.Lower ^ Src2.Lower, - .Upper = Src1.Upper ^ Src2.Upper, - }; - - memcpy(GDP, &Dst, sizeof(Dst)); - } else { - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector1); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector2); - - const auto Dst = Src1 ^ Src2; - memcpy(GDP, &Dst, sizeof(Dst)); - } -} - -DEF_OP(VAdd) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a + b; }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSub) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a - b; }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUQAdd) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { - decltype(a) res = a + b; - return res < a ? ~0U : res; - }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUQSub) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { - decltype(a) res = a - b; - return res > a ? 0U : res; - }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQAdd) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { - static_assert(std::is_same_v); - using Type = decltype(a); - using Limits = std::numeric_limits; - - const Type res = a + b; - - if (a > 0) { - if (b > (Limits::max() - a)) { - return Limits::max(); - } - } - else if (b < (Limits::min() - a)) { - return Limits::min(); - } - - return res; - }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQSub) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { - static_assert(std::is_same_v); - using Type = decltype(a); - using Limits = std::numeric_limits; - - const __int128_t res = a - b; - if (res < Limits::min()) { - return Limits::min(); - } - if (res > Limits::max()) { - return Limits::max(); - } - - return (Type)res; - }; - - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VAddP) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = (OpSize / ElementSize) / 2; - - const auto Func = [](auto a, auto b) { return a + b; }; - switch (ElementSize) { - DO_VECTOR_PAIR_OP(1, uint8_t, Func) - DO_VECTOR_PAIR_OP(2, uint16_t, Func) - DO_VECTOR_PAIR_OP(4, uint32_t, Func) - DO_VECTOR_PAIR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VAddV) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto current, auto a) { return current + a; }; - switch (ElementSize) { - DO_VECTOR_REDUCE_1SRC_OP(1, int8_t, Func, 0) - DO_VECTOR_REDUCE_1SRC_OP(2, int16_t, Func, 0) - DO_VECTOR_REDUCE_1SRC_OP(4, int32_t, Func, 0) - DO_VECTOR_REDUCE_1SRC_OP(8, int64_t, Func, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - return; - } - memcpy(GDP, Tmp.data(), ElementSize); -} - -DEF_OP(VUMinV) { - auto Op = IROp->C(); - const int8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto current, auto a) { return std::min(current, a); }; - switch (ElementSize) { - DO_VECTOR_REDUCE_1SRC_OP(1, uint8_t, Func, ~0) - DO_VECTOR_REDUCE_1SRC_OP(2, uint16_t, Func, ~0) - DO_VECTOR_REDUCE_1SRC_OP(4, uint32_t, Func, ~0U) - DO_VECTOR_REDUCE_1SRC_OP(8, uint64_t, Func, ~0ULL) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), ElementSize); -} - -DEF_OP(VURAvg) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return (a + b + 1) >> 1; }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VAbs) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return std::abs(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, int8_t, Func) - DO_VECTOR_1SRC_OP(2, int16_t, Func) - DO_VECTOR_1SRC_OP(4, int32_t, Func) - DO_VECTOR_1SRC_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VPopcount) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return std::popcount(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, uint8_t, Func) - DO_VECTOR_1SRC_OP(2, uint16_t, Func) - DO_VECTOR_1SRC_OP(4, uint32_t, Func) - DO_VECTOR_1SRC_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFAdd) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a + b; }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFAddP) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = (OpSize / ElementSize) / 2; - - const auto Func = [](auto a, auto b) { return a + b; }; - switch (ElementSize) { - DO_VECTOR_PAIR_OP(4, float, Func) - DO_VECTOR_PAIR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFSub) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a - b; }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFMul) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a * b; }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFDiv) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a / b; }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFMin) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::min(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFMax) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::max(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(4, float, Func) - DO_VECTOR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFRecp) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return 1.0 / a; }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func) - DO_VECTOR_1SRC_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFSqrt) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return std::sqrt(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func) - DO_VECTOR_1SRC_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFRSqrt) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return 1.0 / std::sqrt(a); }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func) - DO_VECTOR_1SRC_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VNeg) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return -a; }; - switch (Op->Header.ElementSize) { - DO_VECTOR_1SRC_OP(1, int8_t, Func) - DO_VECTOR_1SRC_OP(2, int16_t, Func) - DO_VECTOR_1SRC_OP(4, int32_t, Func) - DO_VECTOR_1SRC_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFNeg) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a) { return -a; }; - switch (ElementSize) { - DO_VECTOR_1SRC_OP(4, float, Func) - DO_VECTOR_1SRC_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VNot) { - const auto Op = IROp->C(); - const auto Src = *GetSrc(Data->SSAData, Op->Vector); - - const auto Dst = InterpVector256{ - .Lower = ~Src.Lower, - .Upper = ~Src.Upper, - }; - - memcpy(GDP, &Dst, sizeof(Dst)); -} - -DEF_OP(VUMin) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::min(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMin) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::min(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUMax) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::max(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMax) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return std::max(a, b); }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VZip) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp; - const uint8_t ElementSize = Op->Header.ElementSize; - uint8_t Elements = OpSize / ElementSize; - const uint8_t BaseOffset = IROp->Op == IR::OP_VZIP2 ? (Elements / 2) : 0; - Elements >>= 1; - - switch (ElementSize) { - case 1: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[BaseOffset + i]; - Dst_d[i*2+1] = Src2_d[BaseOffset + i]; - } - break; - } - case 2: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[BaseOffset + i]; - Dst_d[i*2+1] = Src2_d[BaseOffset + i]; - } - break; - } - case 4: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[BaseOffset + i]; - Dst_d[i*2+1] = Src2_d[BaseOffset + i]; - } - break; - } - case 8: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[BaseOffset + i]; - Dst_d[i*2+1] = Src2_d[BaseOffset + i]; - } - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VTrn) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp{}; - const uint8_t ElementSize = Op->Header.ElementSize; - uint8_t Elements = OpSize / ElementSize; - const uint8_t BaseOffset = IROp->Op == IR::OP_VTRN2 ? 1 : 0; - Elements >>= 1; - - switch (ElementSize) { - case 1: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[i*2 + BaseOffset]; - Dst_d[i*2+1] = Src2_d[i*2 + BaseOffset]; - } - break; - } - case 2: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[i*2 + BaseOffset]; - Dst_d[i*2+1] = Src2_d[i*2 + BaseOffset]; - } - break; - } - case 4: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[i*2 + BaseOffset]; - Dst_d[i*2+1] = Src2_d[i*2 + BaseOffset]; - } - break; - } - case 8: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i*2] = Src1_d[i*2 + BaseOffset]; - Dst_d[i*2+1] = Src2_d[i*2 + BaseOffset]; - } - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUnZip) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp; - const uint8_t ElementSize = Op->Header.ElementSize; - uint8_t Elements = OpSize / ElementSize; - const unsigned Start = IROp->Op == IR::OP_VUNZIP ? 0 : 1; - Elements >>= 1; - - switch (ElementSize) { - case 1: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i] = Src1_d[Start + (i * 2)]; - Dst_d[Elements+i] = Src2_d[Start + (i * 2)]; - } - break; - } - case 2: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i] = Src1_d[Start + (i * 2)]; - Dst_d[Elements+i] = Src2_d[Start + (i * 2)]; - } - break; - } - case 4: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i] = Src1_d[Start + (i * 2)]; - Dst_d[Elements+i] = Src2_d[Start + (i * 2)]; - } - break; - } - case 8: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src1_d = reinterpret_cast(Src1); - auto *Src2_d = reinterpret_cast(Src2); - for (unsigned i = 0; i < Elements; ++i) { - Dst_d[i] = Src1_d[Start + (i * 2)]; - Dst_d[Elements+i] = Src2_d[Start + (i * 2)]; - } - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VBSL) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto Src1 = *GetSrc(Data->SSAData, Op->VectorMask); - const auto Src2 = *GetSrc(Data->SSAData, Op->VectorTrue); - const auto Src3 = *GetSrc(Data->SSAData, Op->VectorFalse); - - const auto Tmp = InterpVector256{ - .Lower = (Src2.Lower & Src1.Lower) | (Src3.Lower & ~Src1.Lower), - .Upper = (Src2.Upper & Src1.Upper) | (Src3.Upper & ~Src1.Upper), - }; - - memset(GDP, 0, sizeof(InterpVector256)); - memcpy(GDP, &Tmp, OpSize); -} - -DEF_OP(VCMPEQ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a == b ? ~0ULL : 0; }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VCMPEQZ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint8_t Src2[Core::CPUState::XMM_AVX_REG_SIZE]{}; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a == b ? ~0ULL : 0; }; - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VCMPGT) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a > b ? ~0ULL : 0; }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VCMPGTZ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint8_t Src2[Core::CPUState::XMM_AVX_REG_SIZE]{}; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a > b ? ~0ULL : 0; }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VCMPLTZ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint8_t Src2[Core::CPUState::XMM_AVX_REG_SIZE]{}; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [](auto a, auto b) { return a < b ? ~0ULL : 0; }; - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPEQ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return a == b ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPNEQ) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return a != b ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPLT) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return a < b ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPGT) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return a > b ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPLE) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return a <= b ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPORD) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return (!std::isnan(a) && !std::isnan(b)) ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VFCMPUNO) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - const auto Func = [](auto a, auto b) { return (std::isnan(a) || std::isnan(b)) ? ~0ULL : 0; }; - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto IsScalar = ElementSize == OpSize; - - if (IsScalar) { - switch (ElementSize) { - DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); - DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - else { - switch (ElementSize) { - DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); - DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: - LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); - break; - } - } - - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShl) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftVector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; }; - - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShr) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftVector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a >> b; }; - - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSShr) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftVector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { - return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; - }; - - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShlS) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_OP(1, uint8_t, Func) - DO_VECTOR_SCALAR_OP(2, uint16_t, Func) - DO_VECTOR_SCALAR_OP(4, uint32_t, Func) - DO_VECTOR_SCALAR_OP(8, uint64_t, Func) - DO_VECTOR_SCALAR_OP(16, __uint128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShrS) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a >> b; }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_OP(1, uint8_t, Func) - DO_VECTOR_SCALAR_OP(2, uint16_t, Func) - DO_VECTOR_SCALAR_OP(4, uint32_t, Func) - DO_VECTOR_SCALAR_OP(8, uint64_t, Func) - DO_VECTOR_SCALAR_OP(16, __uint128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSShrS) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - void *Src2 = GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { - return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; - }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_OP(1, int8_t, Func) - DO_VECTOR_SCALAR_OP(2, int16_t, Func) - DO_VECTOR_SCALAR_OP(4, int32_t, Func) - DO_VECTOR_SCALAR_OP(8, int64_t, Func) - DO_VECTOR_SCALAR_OP(16, __int128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShlSWide) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint64_t Src2 = *GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, uint64_t b) { return b >= (sizeof(a) * 8) ? 0 : a << b; }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_WIDE_OP(1, uint8_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(2, uint16_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(4, uint32_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(8, uint64_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(16, __uint128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShrSWide) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint64_t Src2 = *GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, uint64_t b) { return b >= (sizeof(a) * 8) ? 0 : a >> b; }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_WIDE_OP(1, uint8_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(2, uint16_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(4, uint32_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(8, uint64_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(16, __uint128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSShrSWide) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector); - uint64_t Src2 = *GetSrc(Data->SSAData, Op->ShiftScalar); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, uint64_t b) { - return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; - }; - - switch (ElementSize) { - DO_VECTOR_SCALAR_WIDE_OP(1, int8_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(2, int16_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(4, int32_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(8, int64_t, Func) - DO_VECTOR_SCALAR_WIDE_OP(16, __int128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VInsElement) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - const auto ElementSize = Op->Header.ElementSize; - - void *Src1 = GetSrc(Data->SSAData, Op->DestVector); - void *Src2 = GetSrc(Data->SSAData, Op->SrcVector); - TempVectorDataArray Tmp; - - // Copy src1 in to dest - memcpy(Tmp.data(), Src1, OpSize); - switch (ElementSize) { - case 1: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src2_d = reinterpret_cast(Src2); - Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; - break; - } - case 2: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src2_d = reinterpret_cast(Src2); - Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; - break; - } - case 4: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src2_d = reinterpret_cast(Src2); - Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; - break; - } - case 8: { - auto *Dst_d = reinterpret_cast(Tmp.data()); - auto *Src2_d = reinterpret_cast(Src2); - Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; - break; - } - case 16: { - auto *Dst_d = reinterpret_cast<__uint128_t*>(Tmp.data()); - auto *Src2_d = reinterpret_cast<__uint128_t*>(Src2); - Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - }; - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VDupElement) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const uint64_t ElementSize = Op->Header.ElementSize; - const uint64_t ElementSizeBits = ElementSize * 8; - const uint8_t Elements = OpSize / ElementSize; - - constexpr auto AVXRegSize = Core::CPUState::XMM_AVX_REG_SIZE; - constexpr auto SSERegSize = Core::CPUState::XMM_SSE_REG_SIZE; - constexpr auto SSEBitSize = SSERegSize * 8; - - const auto Is128BitElement = ElementSizeBits == SSEBitSize; - const auto Is256Bit = OpSize == AVXRegSize; - - LOGMAN_THROW_AA_FMT(OpSize <= AVXRegSize, - "OpSize is too large for VDupElement: {}", OpSize); - - if (OpSize >= SSERegSize) { - __uint128_t SourceMask = (1ULL << ElementSizeBits) - 1; - if (ElementSize == 8) { - SourceMask = ~0ULL; - } - - const auto GetResult = [&]() -> __uint128_t { - const auto Src = *GetSrc(Data->SSAData, Op->Vector); - uint64_t Shift = ElementSizeBits * Op->Index; - - if (Is128BitElement) { - if (Shift == 0) { - return Src.Lower; - } else { - return Src.Upper; - } - } else { - // Normalize shift to act on upper uint128_t - if (Is256Bit && Shift >= SSEBitSize) { - Shift -= SSEBitSize; - return (Src.Upper >> Shift) & SourceMask; - } else { - return (Src.Lower >> Shift) & SourceMask; - } - } - }; - - const __uint128_t Result = GetResult(); - for (size_t i = 0; i < Elements; ++i) { - auto* Dst = static_cast(GDP) + (ElementSize * i); - memcpy(Dst, &Result, ElementSize); - } - } else { - const uint64_t Shift = ElementSizeBits * Op->Index; - uint64_t SourceMask = (1ULL << ElementSizeBits) - 1; - if (ElementSize == 8) { - SourceMask = ~0ULL; - } - - const uint64_t Src = *GetSrc(Data->SSAData, Op->Vector); - const uint64_t Result = (Src >> Shift) & SourceMask; - for (size_t i = 0; i < Elements; ++i) { - auto* Dst = static_cast(GDP) + (ElementSize * i); - memcpy(Dst, &Result, ElementSize); - } - } -} - -DEF_OP(VExtr) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - const auto OpSizeBits = OpSize * 8; - - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - const auto ElementSize = Op->Header.ElementSize; - const auto Index = Op->Index; - - if (Is256Bit) { - const auto ByteIndex = Index * ElementSize; - const auto IsUpperVectorZero = ByteIndex >= OpSize; - const auto SanitizedByteIndex = IsUpperVectorZero ? ByteIndex - OpSize - : ByteIndex; - - const auto Vectors = IsUpperVectorZero - ? - std::array{ - *GetSrc(Data->SSAData, Op->VectorLower), - InterpVector256{}, - } - : - std::array{ - *GetSrc(Data->SSAData, Op->VectorUpper), - *GetSrc(Data->SSAData, Op->VectorLower), - }; - - const auto* VectorsPtr = reinterpret_cast(Vectors.data()); - const auto* SrcPtr = VectorsPtr + SanitizedByteIndex; - const auto CopyAmount = std::max(0, int(sizeof(Vectors) - SanitizedByteIndex)); - - memcpy(GDP, SrcPtr, CopyAmount); - } else { - uint64_t Offset = Index * ElementSize * 8; - - const auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->VectorLower); - const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->VectorUpper); - - __uint128_t Dst{}; - if (Offset >= OpSizeBits) { - Offset -= OpSizeBits; - Dst = Src1 >> Offset; - } else { - Dst = (Src1 << (OpSizeBits - Offset)) | (Src2 >> Offset); - } - - memcpy(GDP, &Dst, OpSize); - } -} - -DEF_OP(VUShrI) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - const uint8_t BitShift = Op->BitShift; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [BitShift](auto a) { - return BitShift >= (sizeof(a) * 8) ? 0 : a >> BitShift; - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, uint8_t, Func) - DO_VECTOR_1SRC_OP(2, uint16_t, Func) - DO_VECTOR_1SRC_OP(4, uint32_t, Func) - DO_VECTOR_1SRC_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSShrI) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - const uint8_t BitShift = Op->BitShift; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [BitShift](auto a) { - return BitShift >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> BitShift; - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, int8_t, Func) - DO_VECTOR_1SRC_OP(2, int16_t, Func) - DO_VECTOR_1SRC_OP(4, int32_t, Func) - DO_VECTOR_1SRC_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VShlI) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - const uint8_t BitShift = Op->BitShift; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [BitShift](auto a) { - return BitShift >= (sizeof(a) * 8) ? 0 : (a << BitShift); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, uint8_t, Func) - DO_VECTOR_1SRC_OP(2, uint16_t, Func) - DO_VECTOR_1SRC_OP(4, uint32_t, Func) - DO_VECTOR_1SRC_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShrNI) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - const uint8_t BitShift = Op->BitShift; - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [BitShift](auto a, auto min, auto max) { - return BitShift >= (sizeof(a) * 8) ? 0 : a >> BitShift; - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(1, uint8_t, uint16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(2, uint16_t, uint32_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(4, uint32_t, uint64_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUShrNI2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - const uint8_t BitShift = Op->BitShift; - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [BitShift](auto a, auto min, auto max) { - return BitShift >= (sizeof(a) * 8) ? 0 : a >> BitShift; - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP(1, uint8_t, uint16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP(2, uint16_t, uint32_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP(4, uint32_t, uint64_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSXTL) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto min, auto max) { return a; }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(2, int16_t, int8_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, int16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, int32_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSXTL2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto min, auto max) { return a; }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(2, int16_t, int8_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(4, int32_t, int16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(8, int64_t, int32_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUXTL) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto min, auto max) { return a; }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(2, uint16_t, uint8_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(4, uint32_t, uint16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP(8, uint64_t, uint32_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUXTL2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto min, auto max) { return a; }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(2, uint16_t, uint8_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(4, uint32_t, uint16_t, Func, 0, 0) - DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(8, uint64_t, uint32_t, Func, 0, 0) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTN) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(1, int8_t, int16_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - DO_VECTOR_1SRC_2TYPE_OP(2, int16_t, int32_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTN2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP(1, int8_t, int16_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - DO_VECTOR_1SRC_2TYPE_OP_TOP(2, int16_t, int32_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTNPair) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - auto Src = Src1; - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(1, int8_t, int16_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - DO_VECTOR_1SRC_2TYPE_OP(2, int16_t, int32_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - Src = Src2; - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP_DST(1, int8_t, int16_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - DO_VECTOR_1SRC_2TYPE_OP_TOP_DST(2, int16_t, int32_t, Func, std::numeric_limits::min(), std::numeric_limits::max()) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTUN) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(1, uint8_t, int16_t, Func, 0, (1 << 8) - 1) - DO_VECTOR_1SRC_2TYPE_OP(2, uint16_t, int32_t, Func, 0, (1 << 16) - 1) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTUN2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP(1, uint8_t, int16_t, Func, 0, (1 << 8) - 1) - DO_VECTOR_1SRC_2TYPE_OP_TOP(2, uint16_t, int32_t, Func, 0, (1 << 16) - 1) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSQXTUNPair) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->VectorLower); - auto Src = Src1; - void *Src2 = GetSrc(Data->SSAData, Op->VectorUpper); - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / (ElementSize << 1); - const auto Func = [](auto a, auto min, auto max) { - return std::max(std::min(a, (decltype(a))max), (decltype(a))min); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP(1, uint8_t, int16_t, Func, 0, (1 << 8) - 1) - DO_VECTOR_1SRC_2TYPE_OP(2, uint16_t, int32_t, Func, 0, (1 << 16) - 1) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - - Src = Src2; - switch (ElementSize) { - DO_VECTOR_1SRC_2TYPE_OP_TOP_DST(1, uint8_t, int16_t, Func, 0, (1 << 8) - 1) - DO_VECTOR_1SRC_2TYPE_OP_TOP_DST(2, uint16_t, int32_t, Func, 0, (1 << 16) - 1) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUMul) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_OP(1, uint8_t, Func) - DO_VECTOR_OP(2, uint16_t, Func) - DO_VECTOR_OP(4, uint32_t, Func) - DO_VECTOR_OP(8, uint64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUMull) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP(2, uint16_t, uint8_t, Func) - DO_VECTOR_2SRC_2TYPE_OP(4, uint32_t, uint16_t, Func) - DO_VECTOR_2SRC_2TYPE_OP(8, uint64_t, uint32_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMul) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_OP(1, int8_t, Func) - DO_VECTOR_OP(2, int16_t, Func) - DO_VECTOR_OP(4, int32_t, Func) - DO_VECTOR_OP(8, int64_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMull) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP(2, int16_t, int8_t, Func) - DO_VECTOR_2SRC_2TYPE_OP(4, int32_t, int16_t, Func) - DO_VECTOR_2SRC_2TYPE_OP(8, int64_t, int32_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUMull2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(2, uint16_t, uint8_t, Func) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(4, uint32_t, uint16_t, Func) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(8, uint64_t, uint32_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMull2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return a * b; }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(2, int16_t, int8_t, Func) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(4, int32_t, int16_t, Func) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(8, int64_t, int32_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), Op->Header.Size); -} - -DEF_OP(VUMulH) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return (a * b) >> (sizeof(a) * 8 / 2); }; - - switch (ElementSize) { - DO_VECTOR_OP_WIDE(1, uint8_t, uint16_t, Func) - DO_VECTOR_OP_WIDE(2, uint16_t, uint32_t, Func) - DO_VECTOR_OP_WIDE(4, uint32_t, uint64_t, Func) - DO_VECTOR_OP_WIDE(8, uint64_t, __uint128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VSMulH) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - const auto Func = [](auto a, auto b) { return (a * b) >> (sizeof(a) * 8 / 2); }; - - switch (ElementSize) { - DO_VECTOR_OP_WIDE(1, int8_t, int16_t, Func) - DO_VECTOR_OP_WIDE(2, int16_t, int32_t, Func) - DO_VECTOR_OP_WIDE(4, int32_t, int64_t, Func) - DO_VECTOR_OP_WIDE(8, int64_t, __int128_t, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUABDL) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func8 = [](auto a, auto b) { return std::abs((int16_t)a - (int16_t)b); }; - const auto Func16 = [](auto a, auto b) { return std::abs((int32_t)a - (int32_t)b); }; - const auto Func32 = [](auto a, auto b) { return std::abs((int64_t)a - (int64_t)b); }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP(2, uint16_t, uint8_t, Func8) - DO_VECTOR_2SRC_2TYPE_OP(4, uint32_t, uint16_t, Func16) - DO_VECTOR_2SRC_2TYPE_OP(8, uint64_t, uint32_t, Func32) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VUABDL2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src1 = GetSrc(Data->SSAData, Op->Vector1); - void *Src2 = GetSrc(Data->SSAData, Op->Vector2); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func8 = [](auto a, auto b) { return std::abs((int16_t)a - (int16_t)b); }; - const auto Func16 = [](auto a, auto b) { return std::abs((int32_t)a - (int32_t)b); }; - const auto Func32 = [](auto a, auto b) { return std::abs((int64_t)a - (int64_t)b); }; - - switch (ElementSize) { - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(2, uint16_t, uint8_t, Func8) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(4, uint32_t, uint16_t, Func16) - DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(8, uint64_t, uint32_t, Func32) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VTBL1) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const auto *Src1 = GetSrc(Data->SSAData, Op->VectorTable); - const auto *Src2 = GetSrc(Data->SSAData, Op->VectorIndices); - - TempVectorDataArray Tmp; - - for (size_t i = 0; i < OpSize; ++i) { - const uint8_t Index = Src2[i]; - Tmp[i] = Index >= OpSize ? 0 : Src1[Index]; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VTBL2) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const auto *VectorTable1 = GetSrc(Data->SSAData, Op->VectorTable1); - const auto *VectorTable2 = GetSrc(Data->SSAData, Op->VectorTable2); - const auto *VectorIndices = GetSrc(Data->SSAData, Op->VectorIndices); - - TempVectorDataArray Tmp; - - for (size_t i = 0; i < OpSize; ++i) { - const uint8_t Index = VectorIndices[i]; - if (Index >= (OpSize * 2)) { - Tmp[i] = 0; - } - else if (Index >= OpSize) { - Tmp[i] = VectorTable2[Index - OpSize]; - } - else { - Tmp[i] = VectorTable1[Index]; - } - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VRev32) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - - TempVectorDataArray Tmp{}; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / 8; - - // The element working size is always 32-bit - // The defined element size in the op is the operating size of the element swapping - const auto Func8 = [](auto a) { return BSwap32(a); }; - const auto Func16 = [](auto a) { - return (a >> 16) | (a << 16); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, uint32_t, Func8) - DO_VECTOR_1SRC_OP(2, uint32_t, Func16) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VRev64) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - void *Src = GetSrc(Data->SSAData, Op->Vector); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / 8; - - // The element working size is always 64-bit - // The defined element size in the op is the operating size of the element swapping - const auto Func8 = [](auto a) { return BSwap64(a); }; - const auto Func16 = [](auto a) { - return (a >> 48) | // Element[3] -> Element[0] - ((a >> 16) & 0xFFFF'0000U) | // Element[2] -> Element[1] - ((a << 16) & 0xFFFF'0000'0000ULL) | // Element[1] -> Element[2] - (a << 48); // Element[0] -> Element[3] - }; - const auto Func32 = [](auto a) { - return (a >> 32) | (a << 32); - }; - - switch (ElementSize) { - DO_VECTOR_1SRC_OP(1, uint64_t, Func8) - DO_VECTOR_1SRC_OP(2, uint64_t, Func16) - DO_VECTOR_1SRC_OP(4, uint64_t, Func32) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -DEF_OP(VPCMPESTRX) { - const auto Op = IROp->C(); - const auto Control = Op->Control; - - const auto RAX = *GetSrc(Data->SSAData, Op->RAX); - const auto RDX = *GetSrc(Data->SSAData, Op->RDX); - const auto LHS = *GetSrc<__uint128_t*>(Data->SSAData, Op->LHS); - const auto RHS = *GetSrc<__uint128_t*>(Data->SSAData, Op->RHS); - - const auto Result = OpHandlers::handle(RAX, RDX, LHS, RHS, Control); - - memset(GDP, 0, sizeof(uint64_t)); - memcpy(GDP, &Result, sizeof(Result)); -} - -DEF_OP(VPCMPISTRX) { - const auto Op = IROp->C(); - - const auto LHS = *GetSrc<__uint128_t*>(Data->SSAData, Op->LHS); - const auto RHS = *GetSrc<__uint128_t*>(Data->SSAData, Op->RHS); - const auto Control = Op->Control; - - const auto Result = OpHandlers::handle(LHS, RHS, Control); - - memset(GDP, 0, sizeof(uint64_t)); - memcpy(GDP, &Result, sizeof(Result)); -} - -DEF_OP(VFCADD) { - const auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - - const auto *Src1 = GetSrc(Data->SSAData, Op->Vector1); - const auto *Src2 = GetSrc(Data->SSAData, Op->Vector2); - const auto Rotate = Op->Rotate; - LOGMAN_THROW_A_FMT(Rotate == 90 || Rotate == 270, "Invalid rotate!"); - - TempVectorDataArray Tmp; - - const uint8_t ElementSize = Op->Header.ElementSize; - const uint8_t Elements = OpSize / ElementSize; - - const auto Func = [Rotate](auto dst, auto src1, auto src2) { - auto Element1 = src2[1]; - auto Element3 = src2[0]; - if (Rotate == 90) { - Element1 = -Element1; - } - else { - Element3 = -Element3; - } - dst[0] = src1[0] + Element1; - dst[1] = src1[1] + Element3; - }; - - switch (ElementSize) { - //DO_VECTOR_FCADD_PAIR_OP(2, float16_t, Func) - DO_VECTOR_FCADD_PAIR_OP(4, float, Func) - DO_VECTOR_FCADD_PAIR_OP(8, double, Func) - default: - LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); - break; - } - memcpy(GDP, Tmp.data(), OpSize); -} - -#undef DEF_OP - -} // namespace FEXCore::CPU diff --git a/FEXCore/include/FEXCore/Config/Config.h b/FEXCore/include/FEXCore/Config/Config.h index 7524f672c..d9d4e69fb 100644 --- a/FEXCore/include/FEXCore/Config/Config.h +++ b/FEXCore/include/FEXCore/Config/Config.h @@ -19,15 +19,13 @@ namespace FEXCore::Config { namespace Handler { static inline std::optional CoreHandler(std::string_view Value) { - if (Value == "irint") + if (Value == "irjit") return "0"; - else if (Value == "irjit") - return "1"; #ifdef _M_X86_64 else if (Value == "host") - return "2"; + return "1"; #endif - return "1"; + return "0"; } static inline std::optional SMCCheckHandler(std::string_view Value) { @@ -61,7 +59,6 @@ namespace Handler { #include enum ConfigCore { - CONFIG_INTERPRETER, CONFIG_IRJIT, CONFIG_CUSTOM, }; diff --git a/Source/Tools/FEXConfig/Main.cpp b/Source/Tools/FEXConfig/Main.cpp index 7b35c174f..edc6da210 100644 --- a/Source/Tools/FEXConfig/Main.cpp +++ b/Source/Tools/FEXConfig/Main.cpp @@ -246,21 +246,6 @@ namespace { if (ImGui::BeginTabItem("CPU")) { std::optional Value{}; -#ifdef INTERPRETER_ENABLED - ImGui::Text("Core:"); - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CORE); - - ImGui::SameLine(); - if (ImGui::RadioButton("Int", Value.has_value() && **Value == "0")) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "0"); - ConfigChanged = true; - } - ImGui::SameLine(); - if (ImGui::RadioButton("JIT", Value.has_value() && **Value == "1")) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "1"); - ConfigChanged = true; - } -#endif Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MAXINST); if (Value.has_value() && !(*Value)->empty()) { strncpy(BlockSize, &(*Value)->at(0), 32); diff --git a/Source/Tools/FEXLoader/LinuxSyscalls/SignalDelegator.cpp b/Source/Tools/FEXLoader/LinuxSyscalls/SignalDelegator.cpp index 271a96156..be7c7a179 100644 --- a/Source/Tools/FEXLoader/LinuxSyscalls/SignalDelegator.cpp +++ b/Source/Tools/FEXLoader/LinuxSyscalls/SignalDelegator.cpp @@ -1735,26 +1735,7 @@ namespace FEX::HLE { return Result.first; }; - const auto SigbusHandlerInterpreter = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *_info, void *ucontext) -> bool { - const auto PC = ArchHelpers::Context::GetPc(ucontext); - siginfo_t* info = reinterpret_cast(_info); - - if (info->si_code != BUS_ADRALN) { - // This only handles alignment problems - return false; - } - - const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, ArchHelpers::Context::GetArmGPRs(ucontext)); - ArchHelpers::Context::SetPc(ucontext, PC + Result.second); - return Result.first; - }; - - if (Core == FEXCore::Config::CONFIG_INTERPRETER) { - RegisterHostSignalHandler(SIGBUS, SigbusHandlerInterpreter, true); - } - else { - RegisterHostSignalHandler(SIGBUS, SigbusHandler, true); - } + RegisterHostSignalHandler(SIGBUS, SigbusHandler, true); #endif // Register pause signal handler. RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true); diff --git a/unittests/32Bit_ASM/CMakeLists.txt b/unittests/32Bit_ASM/CMakeLists.txt index 1569d78a7..0e2d53460 100644 --- a/unittests/32Bit_ASM/CMakeLists.txt +++ b/unittests/32Bit_ASM/CMakeLists.txt @@ -61,14 +61,6 @@ foreach(ASM_SRC ${ASM_SOURCES}) ) endif() - if (ENABLE_INTERPRETER) - list(APPEND TEST_ARGS - "--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int" - "--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int" - "--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int" - ) - endif() - if (NOT MINGW_BUILD) set (LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner") else() diff --git a/unittests/ASM/CMakeLists.txt b/unittests/ASM/CMakeLists.txt index 2b7402450..2be2dfc64 100644 --- a/unittests/ASM/CMakeLists.txt +++ b/unittests/ASM/CMakeLists.txt @@ -54,13 +54,6 @@ foreach(ASM_SRC ${ASM_SOURCES}) "--no-silent -g -c irjit -n 500 --no-multiblock" "jit_500" "jit" "--no-silent -g -c irjit -n 500 --multiblock" "jit_500_m" "jit" ) - if (ENABLE_INTERPRETER) - list(APPEND TEST_ARGS - "--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int" - "--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int" - "--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int" - ) - endif() if (ENABLE_VIXL_SIMULATOR) set(CPU_CLASS Simulator) diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt index 1e8ebd9e0..d36af7010 100644 --- a/unittests/IR/CMakeLists.txt +++ b/unittests/IR/CMakeLists.txt @@ -25,12 +25,6 @@ foreach(IR_SRC ${IR_SOURCES}) "--no-silent -c irjit -n 500" "ir_jit" "jit" ) - if (ENABLE_INTERPRETER) - list(APPEND TEST_ARGS - "--no-silent -c irint -n 500" "ir_int" "int" - ) - endif() - set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests") if (DEFINED ENV{runner_label}) set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests_$ENV{runner_label}") diff --git a/unittests/POSIX/CMakeLists.txt b/unittests/POSIX/CMakeLists.txt index 916870ca1..60d6fc265 100644 --- a/unittests/POSIX/CMakeLists.txt +++ b/unittests/POSIX/CMakeLists.txt @@ -9,20 +9,6 @@ foreach(POSIX_TEST ${POSIX_TESTS}) list(GET TEST_NAME_LIST 1 TEST_NAME) string(REPLACE "/" "-" TEST_NAME ${TEST_NAME}) - if (ENABLE_INTERPRETER) - add_test(NAME "${TEST_NAME}.int.posix" - COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py" - "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures" - "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output" - "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests" - "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests" - "${TEST_NAME}" - "guest" - "${CMAKE_BINARY_DIR}/Bin/FEXLoader" - "-o" "stderr" "--no-silent" "-c" "irint" "-n" "500" "--" - "${POSIX_TEST}") - endif() - add_test(NAME "${TEST_NAME}.jit.posix" COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py" "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures" diff --git a/unittests/gcc-target-tests-32/CMakeLists.txt b/unittests/gcc-target-tests-32/CMakeLists.txt index 226dddd59..edbb41216 100644 --- a/unittests/gcc-target-tests-32/CMakeLists.txt +++ b/unittests/gcc-target-tests-32/CMakeLists.txt @@ -9,8 +9,6 @@ foreach(TEST ${TESTS}) list(GET TEST_NAME_LIST 1 TEST_NAME) string(REPLACE "/" "-" TEST_NAME ${TEST_NAME}) - # Interpreter is too slow to run these tests, only generate for jit - add_test(NAME "${TEST_NAME}.jit.gcc-target-32" COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py" "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures" diff --git a/unittests/gcc-target-tests-64/CMakeLists.txt b/unittests/gcc-target-tests-64/CMakeLists.txt index 3231cbb70..dd90acb6c 100644 --- a/unittests/gcc-target-tests-64/CMakeLists.txt +++ b/unittests/gcc-target-tests-64/CMakeLists.txt @@ -9,8 +9,6 @@ foreach(TEST ${TESTS}) list(GET TEST_NAME_LIST 1 TEST_NAME) string(REPLACE "/" "-" TEST_NAME ${TEST_NAME}) - # Interpreter is too slow to run these tests, only generate for jit - add_test(NAME "${TEST_NAME}.jit.gcc-target-64" COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py" "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures" diff --git a/unittests/gvisor-tests/CMakeLists.txt b/unittests/gvisor-tests/CMakeLists.txt index 305e53a20..989297206 100644 --- a/unittests/gvisor-tests/CMakeLists.txt +++ b/unittests/gvisor-tests/CMakeLists.txt @@ -9,8 +9,6 @@ foreach(TEST ${TESTS}) list(GET TEST_NAME_LIST 1 TEST_NAME) string(REPLACE "/" "-" TEST_NAME ${TEST_NAME}) - # Interpreter is too slow to run these tests, only generate for jit - add_test(NAME "${TEST_NAME}.jit.gvisor" COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py" "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"