mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-24 08:42:31 +00:00
Merge pull request #3128 from alyssarosenzweig/rm/interp
FEXCore: Gut interpreter
This commit is contained in:
commit
7d99eb05c6
2
.github/workflows/ccpp.yml
vendored
2
.github/workflows/ccpp.yml
vendored
@ -65,7 +65,7 @@ jobs:
|
||||
# Note the current convention is to use the -S and -B options here to specify source
|
||||
# and build directories, but this is only available with CMake 3.13 and higher.
|
||||
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{runner.workspace}}/build
|
||||
|
2
.github/workflows/glibc_fault.yml
vendored
2
.github/workflows/glibc_fault.yml
vendored
@ -73,7 +73,7 @@ jobs:
|
||||
# Note the current convention is to use the -S and -B options here to specify source
|
||||
# and build directories, but this is only available with CMake 3.13 and higher.
|
||||
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{runner.workspace}}/build
|
||||
|
2
.github/workflows/mingw_build.yml
vendored
2
.github/workflows/mingw_build.yml
vendored
@ -74,7 +74,7 @@ jobs:
|
||||
# Note the current convention is to use the -S and -B options here to specify source
|
||||
# and build directories, but this is only available with CMake 3.13 and higher.
|
||||
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=False -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{runner.workspace}}/build
|
||||
|
@ -25,7 +25,6 @@ option(ENABLE_JEMALLOC_GLIBC_ALLOC "Enables jemalloc glibc allocator" TRUE)
|
||||
option(ENABLE_OFFLINE_TELEMETRY "Enables FEX offline telemetry" TRUE)
|
||||
option(ENABLE_COMPILE_TIME_TRACE "Enables time trace compile option" FALSE)
|
||||
option(ENABLE_LIBCXX "Enables LLVM libc++" FALSE)
|
||||
option(ENABLE_INTERPRETER "Enables FEX's Interpreter" FALSE)
|
||||
option(ENABLE_CCACHE "Enables ccache for compile caching" TRUE)
|
||||
option(ENABLE_TERMUX_BUILD "Forces building for Termux on a non-Termux build machine" FALSE)
|
||||
option(ENABLE_VIXL_SIMULATOR "Forces the FEX JIT to use the VIXL simulator" FALSE)
|
||||
@ -97,11 +96,6 @@ if (ENABLE_GDB_SYMBOLS)
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_INTERPRETER)
|
||||
message(STATUS "Interpreter enabled")
|
||||
add_definitions(-DINTERPRETER_ENABLED=1)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Bin)
|
||||
|
@ -159,23 +159,6 @@ if (ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
|
||||
Utils/AllocatorOverride.cpp)
|
||||
endif()
|
||||
|
||||
if (ENABLE_INTERPRETER)
|
||||
list(APPEND SRCS
|
||||
Interface/Core/Interpreter/InterpreterCore.cpp
|
||||
Interface/Core/Interpreter/InterpreterOps.cpp
|
||||
Interface/Core/Interpreter/ALUOps.cpp
|
||||
Interface/Core/Interpreter/AtomicOps.cpp
|
||||
Interface/Core/Interpreter/BranchOps.cpp
|
||||
Interface/Core/Interpreter/ConversionOps.cpp
|
||||
Interface/Core/Interpreter/EncryptionOps.cpp
|
||||
Interface/Core/Interpreter/F80Ops.cpp
|
||||
Interface/Core/Interpreter/FlagOps.cpp
|
||||
Interface/Core/Interpreter/MemoryOps.cpp
|
||||
Interface/Core/Interpreter/MiscOps.cpp
|
||||
Interface/Core/Interpreter/MoveOps.cpp
|
||||
Interface/Core/Interpreter/VectorOps.cpp)
|
||||
endif()
|
||||
|
||||
set(DEFINES -DTHREAD_LOCAL=_Thread_local)
|
||||
|
||||
if (_M_X86_64)
|
||||
|
@ -339,11 +339,7 @@ namespace DefaultValues {
|
||||
#else
|
||||
constexpr uint32_t MaxCoreNumber = 1;
|
||||
#endif
|
||||
#ifdef INTERPRETER_ENABLED
|
||||
constexpr uint32_t MinCoreNumber = 0;
|
||||
#else
|
||||
constexpr uint32_t MinCoreNumber = 1;
|
||||
#endif
|
||||
if (Core > MaxCoreNumber || Core < MinCoreNumber) {
|
||||
// Sanitize the core option by setting the core to the JIT if invalid
|
||||
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast<uint32_t>(FEXCore::Config::CONFIG_IRJIT)));
|
||||
@ -353,11 +349,6 @@ namespace DefaultValues {
|
||||
if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION)) {
|
||||
FEX_CONFIG_OPT(CacheObjectCodeCompilation, CACHEOBJECTCODECOMPILATION);
|
||||
FEX_CONFIG_OPT(Core, CORE);
|
||||
|
||||
if (CacheObjectCodeCompilation() && Core() == FEXCore::Config::CONFIG_INTERPRETER) {
|
||||
// If running the interpreter then disable cache code compilation
|
||||
FEXCore::Config::Erase(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION);
|
||||
}
|
||||
}
|
||||
|
||||
fextl::string ContainerPrefix { FindContainerPrefix() };
|
||||
|
@ -6,12 +6,12 @@
|
||||
"Default": "FEXCore::Config::ConfigCore::CONFIG_IRJIT",
|
||||
"TextDefault": "irjit",
|
||||
"ShortArg": "c",
|
||||
"Choices": [ "irint", "irjit", "host" ],
|
||||
"Choices": [ "irjit", "host" ],
|
||||
"ArgumentHandler": "CoreHandler",
|
||||
"Desc": [
|
||||
"Which CPU core to use",
|
||||
"host only exists on x86_64",
|
||||
"[irint, irjit, host]"
|
||||
"[irjit, host]"
|
||||
]
|
||||
},
|
||||
"Multiblock": {
|
||||
|
@ -308,11 +308,6 @@ namespace FEXCore::Context {
|
||||
FEXCore::Core::InternalThreadState* ContextImpl::InitCore(uint64_t InitialRIP, uint64_t StackPointer) {
|
||||
// Initialize the CPU core signal handlers & DispatcherConfig
|
||||
switch (Config.Core) {
|
||||
#ifdef INTERPRETER_ENABLED
|
||||
case FEXCore::Config::CONFIG_INTERPRETER:
|
||||
BackendFeatures = FEXCore::CPU::GetInterpreterBackendFeatures();
|
||||
break;
|
||||
#endif
|
||||
case FEXCore::Config::CONFIG_IRJIT:
|
||||
#if (_M_X86_64 && JIT_X86_64)
|
||||
BackendFeatures = FEXCore::CPU::GetX86JITBackendFeatures();
|
||||
@ -671,11 +666,6 @@ namespace FEXCore::Context {
|
||||
|
||||
// Create CPU backend
|
||||
switch (Config.Core) {
|
||||
#ifdef INTERPRETER_ENABLED
|
||||
case FEXCore::Config::CONFIG_INTERPRETER:
|
||||
Thread->CPUBackend = FEXCore::CPU::CreateInterpreterCore(this, Thread);
|
||||
break;
|
||||
#endif
|
||||
case FEXCore::Config::CONFIG_IRJIT:
|
||||
Thread->PassManager->InsertRegisterAllocationPass(DoSRA, HostFeatures.SupportsAVX);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,792 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <FEXCore/Utils/BitUtils.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
|
||||
#ifdef _M_X86_64
|
||||
uint8_t AtomicFetchNeg(uint8_t *Addr) {
|
||||
using Type = uint8_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
uint16_t AtomicFetchNeg(uint16_t *Addr) {
|
||||
using Type = uint16_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
uint32_t AtomicFetchNeg(uint32_t *Addr) {
|
||||
using Type = uint32_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
uint64_t AtomicFetchNeg(uint64_t *Addr) {
|
||||
using Type = uint64_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T AtomicCompareAndSwap(T expected, T desired, T *addr)
|
||||
{
|
||||
std::atomic<T> *MemData = reinterpret_cast<std::atomic<T>*>(addr);
|
||||
|
||||
T Src1 = expected;
|
||||
T Src2 = desired;
|
||||
|
||||
T Expected = Src1;
|
||||
bool Result = MemData->compare_exchange_strong(Expected, Src2);
|
||||
|
||||
return Result ? Src1 : Expected;
|
||||
}
|
||||
|
||||
template uint8_t AtomicCompareAndSwap<uint8_t>(uint8_t expected, uint8_t desired, uint8_t *addr);
|
||||
template uint16_t AtomicCompareAndSwap<uint16_t>(uint16_t expected, uint16_t desired, uint16_t *addr);
|
||||
template uint32_t AtomicCompareAndSwap<uint32_t>(uint32_t expected, uint32_t desired, uint32_t *addr);
|
||||
template uint64_t AtomicCompareAndSwap<uint64_t>(uint64_t expected, uint64_t desired, uint64_t *addr);
|
||||
|
||||
#else
|
||||
// Needs to match what the AArch64 JIT and unaligned signal handler expects
|
||||
uint8_t AtomicFetchNeg(uint8_t *Addr) {
|
||||
using Type = uint8_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrb %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint16_t AtomicFetchNeg(uint16_t *Addr) {
|
||||
using Type = uint16_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrh %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint32_t AtomicFetchNeg(uint32_t *Addr) {
|
||||
using Type = uint32_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxr %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint64_t AtomicFetchNeg(uint64_t *Addr) {
|
||||
using Type = uint64_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %[Result], [%[Memory]];
|
||||
neg %[Tmp], %[Result];
|
||||
stlxr %w[TmpStatus], %[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) {
|
||||
using Type = uint8_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrb %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected], uxtb;
|
||||
b.ne 2f;
|
||||
stlxrb %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) {
|
||||
using Type = uint16_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrh %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected], uxth;
|
||||
b.ne 2f;
|
||||
stlxrh %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) {
|
||||
using Type = uint32_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected];
|
||||
b.ne 2f;
|
||||
stlxr %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) {
|
||||
using Type = uint64_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %[Tmp], [%[Memory]];
|
||||
cmp %[Tmp], %[Expected];
|
||||
b.ne 2f;
|
||||
stlxr %w[Tmp2], %[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %[Result], %[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %[Result], %[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(CASPair) {
|
||||
auto Op = IROp->C<IR::IROp_CASPair>();
|
||||
|
||||
// Size is the size of each pair element
|
||||
switch (IROp->ElementSize) {
|
||||
case 4: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Expected),
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Desired),
|
||||
*GetSrc<uint64_t**>(Data->SSAData, Op->Addr)
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<__uint128_t> *MemData = *GetSrc<std::atomic<__uint128_t> **>(Data->SSAData, Op->Addr);
|
||||
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Expected);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Desired);
|
||||
|
||||
__uint128_t Expected = Src1;
|
||||
bool Result = MemData->compare_exchange_strong(Expected, Src2);
|
||||
memcpy(GDP, Result ? &Src1 : &Expected, 16);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", IROp->ElementSize); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CAS) {
|
||||
auto Op = IROp->C<IR::IROp_CAS>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint8_t*>(Data->SSAData, Op->Expected),
|
||||
*GetSrc<uint8_t*>(Data->SSAData, Op->Desired),
|
||||
*GetSrc<uint8_t**>(Data->SSAData, Op->Addr)
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint16_t*>(Data->SSAData, Op->Expected),
|
||||
*GetSrc<uint16_t*>(Data->SSAData, Op->Desired),
|
||||
*GetSrc<uint16_t**>(Data->SSAData, Op->Addr)
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint32_t*>(Data->SSAData, Op->Expected),
|
||||
*GetSrc<uint32_t*>(Data->SSAData, Op->Desired),
|
||||
*GetSrc<uint32_t**>(Data->SSAData, Op->Addr)
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Expected),
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Desired),
|
||||
*GetSrc<uint64_t**>(Data->SSAData, Op->Addr)
|
||||
);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicAdd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicAdd>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicSub) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicSub>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicAnd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicAnd>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicOr) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicOr>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicXor) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicXor>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicSwap) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicSwap>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchAdd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchSub) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchAnd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchOr) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchXor) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
uint8_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
uint16_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
uint32_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
uint64_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchNeg) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
|
||||
switch (IROp->Size) {
|
||||
case 1: {
|
||||
using Type = uint8_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
using Type = uint16_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
using Type = uint32_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
using Type = uint64_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
|
||||
}
|
||||
}
|
||||
DEF_OP(TelemetrySetValue) {
|
||||
#ifndef FEX_DISABLE_TELEMETRY
|
||||
auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
|
||||
auto TelemetryPtr = reinterpret_cast<std::atomic<uint64_t>*>(Data->State->CurrentFrame->Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex]);
|
||||
uint64_t Set{};
|
||||
if (Src != 0) {
|
||||
Set = 1;
|
||||
}
|
||||
|
||||
*TelemetryPtr |= Set;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,158 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Context/Context.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
#include "Interface/HLE/Thunks/Thunks.h"
|
||||
|
||||
#include <FEXCore/Utils/BitUtils.h>
|
||||
#include <FEXCore/HLE/SyscallHandler.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
|
||||
DEF_OP(CallbackReturn) {
|
||||
Data->State->CurrentFrame->Pointers.Interpreter.CallbackReturn(Data->State, Data->StackEntry);
|
||||
}
|
||||
|
||||
DEF_OP(ExitFunction) {
|
||||
auto Op = IROp->C<IR::IROp_ExitFunction>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uintptr_t* ContextPtr = reinterpret_cast<uintptr_t*>(Data->State->CurrentFrame);
|
||||
|
||||
void *ContextData = reinterpret_cast<void*>(ContextPtr);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->NewRIP);
|
||||
|
||||
memcpy(ContextData, Src, OpSize);
|
||||
|
||||
Data->BlockResults.Quit = true;
|
||||
}
|
||||
|
||||
DEF_OP(Jump) {
|
||||
auto Op = IROp->C<IR::IROp_Jump>();
|
||||
const uintptr_t ListBegin = Data->CurrentIR->GetListData();
|
||||
const uintptr_t DataBegin = Data->CurrentIR->GetData();
|
||||
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TargetBlock);
|
||||
Data->BlockResults.Redo = true;
|
||||
}
|
||||
|
||||
DEF_OP(CondJump) {
|
||||
auto Op = IROp->C<IR::IROp_CondJump>();
|
||||
const uintptr_t ListBegin = Data->CurrentIR->GetListData();
|
||||
const uintptr_t DataBegin = Data->CurrentIR->GetData();
|
||||
|
||||
bool CompResult;
|
||||
|
||||
const uint64_t Src1 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp1);
|
||||
const uint64_t Src2 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp2);
|
||||
|
||||
if (Op->CompareSize == 4)
|
||||
CompResult = IsConditionTrue<uint32_t, int32_t, float>(Op->Cond.Val, Src1, Src2);
|
||||
else
|
||||
CompResult = IsConditionTrue<uint64_t, int64_t, double>(Op->Cond.Val, Src1, Src2);
|
||||
|
||||
if (CompResult) {
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock);
|
||||
}
|
||||
else {
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock);
|
||||
}
|
||||
Data->BlockResults.Redo = true;
|
||||
}
|
||||
|
||||
DEF_OP(Syscall) {
|
||||
auto Op = IROp->C<IR::IROp_Syscall>();
|
||||
|
||||
FEXCore::HLE::SyscallArguments Args;
|
||||
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
|
||||
if (Op->Header.Args[j].IsInvalid()) break;
|
||||
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
|
||||
}
|
||||
|
||||
uint64_t Res = FEXCore::Context::HandleSyscall(static_cast<Context::ContextImpl*>(Data->State->CTX)->SyscallHandler, Data->State->CurrentFrame, &Args);
|
||||
GD = Res;
|
||||
}
|
||||
|
||||
DEF_OP(InlineSyscall) {
|
||||
auto Op = IROp->C<IR::IROp_InlineSyscall>();
|
||||
|
||||
FEXCore::HLE::SyscallArguments Args;
|
||||
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
|
||||
if (Op->Header.Args[j].IsInvalid()) break;
|
||||
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
|
||||
}
|
||||
|
||||
// We don't want the errno handling but I also don't want to write inline ASM atm
|
||||
uint64_t Res = syscall(
|
||||
Op->HostSyscallNumber,
|
||||
Args.Argument[0],
|
||||
Args.Argument[1],
|
||||
Args.Argument[2],
|
||||
Args.Argument[3],
|
||||
Args.Argument[4],
|
||||
Args.Argument[5],
|
||||
Args.Argument[6]
|
||||
);
|
||||
|
||||
if (Res == -1) {
|
||||
Res = -errno;
|
||||
}
|
||||
|
||||
GD = Res;
|
||||
}
|
||||
|
||||
DEF_OP(Thunk) {
|
||||
auto Op = IROp->C<IR::IROp_Thunk>();
|
||||
|
||||
auto thunkFn = static_cast<Context::ContextImpl*>(Data->State->CTX)->ThunkHandler->LookupThunk(Op->ThunkNameHash);
|
||||
thunkFn(*GetSrc<void**>(Data->SSAData, Op->ArgPtr));
|
||||
}
|
||||
|
||||
DEF_OP(ValidateCode) {
|
||||
auto Op = IROp->C<IR::IROp_ValidateCode>();
|
||||
|
||||
auto CodePtr = Data->CurrentEntry + Op->Offset;
|
||||
if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) {
|
||||
GD = 1;
|
||||
} else {
|
||||
GD = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(ThreadRemoveCodeEntry) {
|
||||
static_cast<Context::ContextImpl*>(Data->State->CTX)->ThreadRemoveCodeEntryFromJit(Data->State->CurrentFrame, Data->CurrentEntry);
|
||||
}
|
||||
|
||||
DEF_OP(CPUID) {
|
||||
auto Op = IROp->C<IR::IROp_CPUID>();
|
||||
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
|
||||
const uint64_t Arg = *GetSrc<uint64_t*>(Data->SSAData, Op->Function);
|
||||
const uint64_t Leaf = *GetSrc<uint64_t*>(Data->SSAData, Op->Leaf);
|
||||
|
||||
auto Results = Data->State->CTX->RunCPUIDFunction(Arg, Leaf);
|
||||
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
|
||||
}
|
||||
|
||||
DEF_OP(XGETBV) {
|
||||
auto Op = IROp->C<IR::IROp_XGetBV>();
|
||||
uint32_t *DstPtr = GetDest<uint32_t*>(Data->SSAData, Node);
|
||||
const uint32_t Function = *GetSrc<uint32_t*>(Data->SSAData, Op->Function);
|
||||
|
||||
auto Results = Data->State->CTX->RunXCRFunction(Function);
|
||||
memcpy(DstPtr, &Results, sizeof(uint32_t) * 2);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,279 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(VInsGPR) {
|
||||
const auto Op = IROp->C<IR::IROp_VInsGPR>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = Op->Header.ElementSize;
|
||||
const auto ElementSizeBits = ElementSize * 8;
|
||||
constexpr auto SSEBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
|
||||
|
||||
const uint64_t Offset = Op->DestIdx * ElementSizeBits;
|
||||
const auto InUpperLane = Offset >= SSEBitSize;
|
||||
|
||||
__uint128_t Mask = (1ULL << ElementSizeBits) - 1;
|
||||
if (ElementSize == 8) {
|
||||
Mask = ~0ULL;
|
||||
}
|
||||
|
||||
const auto Src1 = *GetSrc<InterpVector256*>(Data->SSAData, Op->DestVector);
|
||||
const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Src);
|
||||
|
||||
const auto Scalar = Src2 & Mask;
|
||||
const auto ScaledOffset = InUpperLane ? Offset - SSEBitSize
|
||||
: Offset;
|
||||
|
||||
// Now shift into place and set all bits but
|
||||
// the ones where we're going to insert our value.
|
||||
Mask <<= ScaledOffset;
|
||||
Mask = ~Mask;
|
||||
|
||||
const auto Dst = [&] {
|
||||
if (InUpperLane) {
|
||||
return InterpVector256{
|
||||
.Lower = Src1.Lower,
|
||||
.Upper = (Src1.Upper & Mask) | (Scalar << ScaledOffset),
|
||||
};
|
||||
} else {
|
||||
return InterpVector256{
|
||||
.Lower = (Src1.Lower & Mask) | (Scalar << ScaledOffset),
|
||||
.Upper = Src1.Upper,
|
||||
};
|
||||
}
|
||||
}();
|
||||
|
||||
memcpy(GDP, &Dst, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(VCastFromGPR) {
|
||||
auto Op = IROp->C<IR::IROp_VCastFromGPR>();
|
||||
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Src), Op->Header.ElementSize);
|
||||
}
|
||||
|
||||
DEF_OP(VDupFromGPR) {
|
||||
const auto Op = IROp->C<IR::IROp_VDupFromGPR>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
const auto NumElements = OpSize / IROp->ElementSize;
|
||||
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const auto *Src = GetSrc<void*>(Data->SSAData, Op->Src);
|
||||
for (size_t i = 0; i < NumElements; i++) {
|
||||
memcpy(&Tmp[i * ElementSize], Src, ElementSize);
|
||||
}
|
||||
|
||||
memcpy(GDP, Tmp.data(), sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(Float_FromGPR_S) {
|
||||
auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();
|
||||
|
||||
const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
|
||||
switch (Conv) {
|
||||
case 0x0404: { // Float <- int32_t
|
||||
const float Dst = (float)*GetSrc<int32_t*>(Data->SSAData, Op->Src);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- int64_t
|
||||
const float Dst = (float)*GetSrc<int64_t*>(Data->SSAData, Op->Src);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0804: { // Double <- int32_t
|
||||
const double Dst = (double)*GetSrc<int32_t*>(Data->SSAData, Op->Src);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0808: { // Double <- int64_t
|
||||
const double Dst = (double)*GetSrc<int64_t*>(Data->SSAData, Op->Src);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Float_FToF) {
|
||||
auto Op = IROp->C<IR::IROp_Float_FToF>();
|
||||
const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
|
||||
switch (Conv) {
|
||||
case 0x0804: { // Double <- Float
|
||||
const double Dst = (double)*GetSrc<float*>(Data->SSAData, Op->Scalar);
|
||||
memcpy(GDP, &Dst, 8);
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- Double
|
||||
const float Dst = (float)*GetSrc<double*>(Data->SSAData, Op->Scalar);
|
||||
memcpy(GDP, &Dst, 4);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Vector_SToF) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_SToF>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const uint8_t ElementSize = Op->Header.ElementSize;
|
||||
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
|
||||
|
||||
const auto Func = [](auto a, auto min, auto max) { return a; };
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0)
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToZS) {
|
||||
const auto Op = IROp->C<IR::IROp_Vector_FToZS>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const uint8_t ElementSize = Op->Header.ElementSize;
|
||||
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
|
||||
|
||||
const auto Func = [](auto a, auto min, auto max) { return std::trunc(a); };
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToS) {
|
||||
const auto Op = IROp->C<IR::IROp_Vector_FToS>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const uint8_t ElementSize = Op->Header.ElementSize;
|
||||
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
|
||||
|
||||
const auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); };
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToF) {
|
||||
const auto Op = IROp->C<IR::IROp_Vector_FToF>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const uint16_t ElementSize = Op->Header.ElementSize;
|
||||
const uint16_t Conv = (ElementSize << 8) | Op->SrcElementSize;
|
||||
|
||||
const auto Func = [](auto a, auto min, auto max) { return a; };
|
||||
switch (Conv) {
|
||||
case 0x0804: { // Double <- float
|
||||
// Only the lower elements from the source
|
||||
// This uses half the source elements
|
||||
uint8_t Elements = OpSize / 8;
|
||||
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0)
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- Double
|
||||
// Little bit tricky here
|
||||
// Sometimes is used to convert from a 128bit vector register
|
||||
// in to a 64bit vector register with different sized elements
|
||||
// eg: %5 i32v2 = Vector_FToF %4 i128, #0x8
|
||||
uint8_t Elements = OpSize == 8 ? 2 : OpSize / Op->SrcElementSize;
|
||||
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0)
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv);
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToI) {
|
||||
const auto Op = IROp->C<IR::IROp_Vector_FToI>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
|
||||
TempVectorDataArray Tmp{};
|
||||
|
||||
const uint8_t ElementSize = Op->Header.ElementSize;
|
||||
const uint8_t Elements = OpSize / ElementSize;
|
||||
const auto Func_Nearest = [](auto a) { return std::rint(a); };
|
||||
const auto Func_Neg = [](auto a) { return std::floor(a); };
|
||||
const auto Func_Pos = [](auto a) { return std::ceil(a); };
|
||||
const auto Func_Trunc = [](auto a) { return std::trunc(a); };
|
||||
const auto Func_Host = [](auto a) { return std::rint(a); };
|
||||
|
||||
switch (Op->Round) {
|
||||
case FEXCore::IR::Round_Nearest.Val:
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Nearest)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Nearest)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Negative_Infinity.Val:
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Neg)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Neg)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Positive_Infinity.Val:
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Pos)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Pos)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Towards_Zero.Val:
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Trunc)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Trunc)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Host.Val:
|
||||
switch (ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Host)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Host)
|
||||
}
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,557 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace AES {
|
||||
static __uint128_t InvShiftRows(uint8_t *State) {
|
||||
uint8_t Shifted[16] = {
|
||||
State[0], State[13], State[10], State[7],
|
||||
State[4], State[1], State[14], State[11],
|
||||
State[8], State[5], State[2], State[15],
|
||||
State[12], State[9], State[6], State[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Shifted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t InvSubBytes(uint8_t *State) {
|
||||
// 16x16 matrix table
|
||||
static const uint8_t InvSubstitutionTable[256] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
// Uses a byte substitution table with a constant set of values
|
||||
// Needs to do a table look up
|
||||
uint8_t Substituted[16];
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
Substituted[i] = InvSubstitutionTable[State[i]];
|
||||
}
|
||||
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Substituted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t ShiftRows(uint8_t *State) {
|
||||
uint8_t Shifted[16] = {
|
||||
State[0], State[5], State[10], State[15],
|
||||
State[4], State[9], State[14], State[3],
|
||||
State[8], State[13], State[2], State[7],
|
||||
State[12], State[1], State[6], State[11],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Shifted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t SubBytes(uint8_t *State, size_t Bytes) {
|
||||
// 16x16 matrix table
|
||||
static const uint8_t SubstitutionTable[256] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
// Uses a byte substitution table with a constant set of values
|
||||
// Needs to do a table look up
|
||||
uint8_t Substituted[16];
|
||||
Bytes = std::min(Bytes, (size_t)16);
|
||||
for (size_t i = 0; i < Bytes; ++i) {
|
||||
Substituted[i] = SubstitutionTable[State[i]];
|
||||
}
|
||||
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Substituted, Bytes);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static uint8_t FFMul02(uint8_t in) {
|
||||
static const uint8_t FFMul02[256] = {
|
||||
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
|
||||
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
|
||||
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
|
||||
0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
|
||||
0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
|
||||
0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
|
||||
0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
|
||||
0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
|
||||
0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
|
||||
0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
|
||||
0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
|
||||
0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
|
||||
0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
|
||||
0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
|
||||
0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
|
||||
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
|
||||
};
|
||||
return FFMul02[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul03(uint8_t in) {
|
||||
static const uint8_t FFMul03[256] = {
|
||||
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
|
||||
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
|
||||
0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
|
||||
0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
|
||||
0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
|
||||
0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
|
||||
0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
|
||||
0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
|
||||
0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
|
||||
0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
|
||||
0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
|
||||
0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
|
||||
0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
|
||||
0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
|
||||
0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
|
||||
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
|
||||
};
|
||||
return FFMul03[in];
|
||||
}
|
||||
|
||||
static __uint128_t MixColumns(uint8_t *State) {
|
||||
uint8_t In0[16] = {
|
||||
State[0], State[4], State[8], State[12],
|
||||
State[1], State[5], State[9], State[13],
|
||||
State[2], State[6], State[10], State[14],
|
||||
State[3], State[7], State[11], State[15],
|
||||
};
|
||||
|
||||
uint8_t Out0[4]{};
|
||||
uint8_t Out1[4]{};
|
||||
uint8_t Out2[4]{};
|
||||
uint8_t Out3[4]{};
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i];
|
||||
Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i];
|
||||
Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]);
|
||||
Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]);
|
||||
}
|
||||
|
||||
uint8_t OutArray[16] = {
|
||||
Out0[0], Out1[0], Out2[0], Out3[0],
|
||||
Out0[1], Out1[1], Out2[1], Out3[1],
|
||||
Out0[2], Out1[2], Out2[2], Out3[2],
|
||||
Out0[3], Out1[3], Out2[3], Out3[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, OutArray, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static uint8_t FFMul09(uint8_t in) {
|
||||
static const uint8_t FFMul09[256] = {
|
||||
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
|
||||
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
|
||||
0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
|
||||
0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
|
||||
0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
|
||||
0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
|
||||
0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
|
||||
0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
|
||||
0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
|
||||
0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
|
||||
0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
|
||||
0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
|
||||
0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
|
||||
0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
|
||||
0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
|
||||
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
|
||||
};
|
||||
return FFMul09[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0B(uint8_t in) {
|
||||
static const uint8_t FFMul0B[256] = {
|
||||
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
|
||||
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
|
||||
0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
|
||||
0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
|
||||
0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
|
||||
0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
|
||||
0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
|
||||
0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
|
||||
0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
|
||||
0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
|
||||
0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
|
||||
0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
|
||||
0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
|
||||
0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
|
||||
0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
|
||||
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
|
||||
};
|
||||
return FFMul0B[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0D(uint8_t in) {
|
||||
static const uint8_t FFMul0D[256] = {
|
||||
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
|
||||
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
|
||||
0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
|
||||
0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
|
||||
0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
|
||||
0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
|
||||
0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
|
||||
0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
|
||||
0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
|
||||
0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
|
||||
0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
|
||||
0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
|
||||
0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
|
||||
0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
|
||||
0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
|
||||
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
|
||||
};
|
||||
|
||||
return FFMul0D[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0E(uint8_t in) {
|
||||
static const uint8_t FFMul0E[256] = {
|
||||
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
|
||||
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
|
||||
0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
|
||||
0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
|
||||
0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
|
||||
0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
|
||||
0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
|
||||
0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
|
||||
0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
|
||||
0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
|
||||
0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
|
||||
0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
|
||||
0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
|
||||
0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
|
||||
0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
|
||||
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
|
||||
};
|
||||
|
||||
return FFMul0E[in];
|
||||
}
|
||||
|
||||
static __uint128_t InvMixColumns(uint8_t *State) {
|
||||
uint8_t In0[16] = {
|
||||
State[0], State[4], State[8], State[12],
|
||||
State[1], State[5], State[9], State[13],
|
||||
State[2], State[6], State[10], State[14],
|
||||
State[3], State[7], State[11], State[15],
|
||||
};
|
||||
|
||||
uint8_t Out0[4]{};
|
||||
uint8_t Out1[4]{};
|
||||
uint8_t Out2[4]{};
|
||||
uint8_t Out3[4]{};
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]);
|
||||
Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]);
|
||||
Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]);
|
||||
Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]);
|
||||
}
|
||||
|
||||
uint8_t OutArray[16] = {
|
||||
Out0[0], Out1[0], Out2[0], Out3[0],
|
||||
Out0[1], Out1[1], Out2[1], Out3[1],
|
||||
Out0[2], Out1[2], Out2[2], Out3[2],
|
||||
Out0[3], Out1[3], Out2[3], Out3[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, OutArray, 16);
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
||||
namespace CRC32 {
|
||||
// CRC32 per byte lookup table.
|
||||
constexpr std::array<uint32_t, 256> CRC32CTable = []() consteval {
|
||||
std::array<uint32_t, 256> Table{};
|
||||
|
||||
// Clang 11.x doesn't support bitreverse as a consteval
|
||||
// constexpr uint32_t Polynomial = 0x1EDC6F41;
|
||||
constexpr uint32_t PolynomialRev = 0x82F63B78; //__builtin_bitreverse32(Polynomial);
|
||||
|
||||
for (size_t Char = 0; Char < std::size(Table); ++Char) {
|
||||
uint32_t CurrentChar = Char;
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
if (CurrentChar & 1) {
|
||||
CurrentChar = (CurrentChar >> 1) ^ PolynomialRev;
|
||||
}
|
||||
else {
|
||||
CurrentChar >>= 1;
|
||||
}
|
||||
}
|
||||
Table[Char] = CurrentChar;
|
||||
}
|
||||
|
||||
return Table;
|
||||
}();
|
||||
|
||||
uint32_t crc32cb(uint32_t Accumulator, uint8_t data) {
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ data] ^ Accumulator >> 8;
|
||||
return Accumulator;
|
||||
}
|
||||
|
||||
uint32_t crc32ch(uint32_t Accumulator, uint16_t data) {
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
|
||||
return Accumulator;
|
||||
}
|
||||
|
||||
uint32_t crc32cw(uint32_t Accumulator, uint32_t data) {
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8;
|
||||
return Accumulator;
|
||||
}
|
||||
|
||||
uint32_t crc32cx(uint32_t Accumulator, uint64_t data) {
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 32) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 40) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 48) & 0xFF)] ^ Accumulator >> 8;
|
||||
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 56) & 0xFF)] ^ Accumulator >> 8;
|
||||
return Accumulator;
|
||||
}
|
||||
}
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
|
||||
DEF_OP(AESImc) {
|
||||
auto Op = IROp->C<IR::IROp_VAESImc>();
|
||||
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector);
|
||||
|
||||
// Pseudo-code
|
||||
// Dst = InvMixColumns(STATE)
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Src1));
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESEnc) {
|
||||
auto Op = IROp->C<IR::IROp_VAESEnc>();
|
||||
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
|
||||
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = ShiftRows(STATE)
|
||||
// STATE = SubBytes(STATE)
|
||||
// STATE = MixColumns(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
|
||||
Tmp = AES::MixColumns(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESEncLast) {
|
||||
auto Op = IROp->C<IR::IROp_VAESEncLast>();
|
||||
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
|
||||
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = ShiftRows(STATE)
|
||||
// STATE = SubBytes(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESDec) {
|
||||
auto Op = IROp->C<IR::IROp_VAESDec>();
|
||||
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
|
||||
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = InvShiftRows(STATE)
|
||||
// STATE = InvSubBytes(STATE)
|
||||
// STATE = InvMixColumns(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESDecLast) {
|
||||
auto Op = IROp->C<IR::IROp_VAESDecLast>();
|
||||
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
|
||||
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = InvShiftRows(STATE)
|
||||
// STATE = InvSubBytes(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESKeyGenAssist) {
|
||||
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
|
||||
const uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Src);
|
||||
|
||||
// Pseudo-code
|
||||
// X3 = Src1[127:96]
|
||||
// X2 = Src1[95:64]
|
||||
// X1 = Src1[63:32]
|
||||
// X0 = Src1[31:30]
|
||||
// RCON = (Zext)rcon
|
||||
// Dest[31:0] = SubWord(X1)
|
||||
// Dest[63:32] = RotWord(SubWord(X1)) XOR RCON
|
||||
// Dest[95:64] = SubWord(X3)
|
||||
// Dest[127:96] = RotWord(SubWord(X3)) XOR RCON
|
||||
__uint128_t Tmp{};
|
||||
uint32_t X1{};
|
||||
uint32_t X3{};
|
||||
memcpy(&X1, &Src1[4], 4);
|
||||
memcpy(&X3, &Src1[12], 4);
|
||||
uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X1), 4);
|
||||
uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X3), 4);
|
||||
|
||||
auto Ror = [] (auto In, auto R) {
|
||||
auto RotateMask = sizeof(In) * 8 - 1;
|
||||
R &= RotateMask;
|
||||
return (In >> R) | (In << (sizeof(In) * 8 - R));
|
||||
};
|
||||
|
||||
uint32_t Rot_X1 = Ror(SubWord_X1, 8);
|
||||
uint32_t Rot_X3 = Ror(SubWord_X3, 8);
|
||||
|
||||
Tmp = Rot_X3 ^ Op->RCON;
|
||||
Tmp <<= 32;
|
||||
Tmp |= SubWord_X3;
|
||||
Tmp <<= 32;
|
||||
Tmp |= Rot_X1 ^ Op->RCON;
|
||||
Tmp <<= 32;
|
||||
Tmp |= SubWord_X1;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(CRC32) {
|
||||
auto Op = IROp->C<IR::IROp_CRC32>();
|
||||
uint32_t Src1 = *GetSrc<uint32_t*>(Data->SSAData, Op->Src1);
|
||||
uint8_t *Src2 = GetSrc<uint8_t*>(Data->SSAData, Op->Src2);
|
||||
uint32_t Tmp{};
|
||||
|
||||
switch (Op->SrcSize) {
|
||||
case 1:
|
||||
Tmp = CRC32::crc32cb(Src1, *(uint8_t*)Src2);
|
||||
break;
|
||||
case 2:
|
||||
Tmp = CRC32::crc32ch(Src1, *(uint16_t*)Src2);
|
||||
break;
|
||||
case 4:
|
||||
Tmp = CRC32::crc32cw(Src1, *(uint32_t*)Src2);
|
||||
break;
|
||||
case 8:
|
||||
Tmp = CRC32::crc32cx(Src1, *(uint64_t*)Src2);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown CRC32C size: {}", Op->SrcSize);
|
||||
break;
|
||||
|
||||
}
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(PCLMUL) {
|
||||
auto Op = IROp->C<IR::IROp_PCLMUL>();
|
||||
|
||||
const auto Selector = Op->Selector;
|
||||
auto* Dst = GetDest<uint64_t*>(Data->SSAData, Node);
|
||||
auto* Src1 = GetSrc<uint64_t*>(Data->SSAData, Op->Src1);
|
||||
auto* Src2 = GetSrc<uint64_t*>(Data->SSAData, Op->Src2);
|
||||
|
||||
const uint64_t TMP1 = (Selector & 0x01) == 0 ? Src1[0] : Src1[1];
|
||||
const uint64_t TMP2 = (Selector & 0x10) == 0 ? Src2[0] : Src2[1];
|
||||
|
||||
const auto make_lo = [](uint64_t lhs, uint64_t rhs) {
|
||||
uint64_t result = 0;
|
||||
|
||||
for (size_t i = 0; i < 64; i++) {
|
||||
if ((lhs & (1ULL << i)) != 0) {
|
||||
result ^= rhs << i;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
const auto make_hi = [](uint64_t lhs, uint64_t rhs) {
|
||||
uint64_t result = 0;
|
||||
|
||||
for (size_t i = 1; i < 64; i++) {
|
||||
if ((lhs & (1ULL << i)) != 0) {
|
||||
result ^= rhs >> (64 - i);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
Dst[0] = make_lo(TMP1, TMP2);
|
||||
Dst[1] = make_hi(TMP1, TMP2);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,349 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include "Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(F80ADD) {
|
||||
auto Op = IROp->C<IR::IROp_F80Add>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80ADD>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SUB) {
|
||||
auto Op = IROp->C<IR::IROp_F80Sub>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80SUB>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80MUL) {
|
||||
auto Op = IROp->C<IR::IROp_F80Mul>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80MUL>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80DIV) {
|
||||
auto Op = IROp->C<IR::IROp_F80Div>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80DIV>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FYL2X) {
|
||||
auto Op = IROp->C<IR::IROp_F80FYL2X>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80FYL2X>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80ATAN) {
|
||||
auto Op = IROp->C<IR::IROp_F80ATAN>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80ATAN>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FPREM1) {
|
||||
auto Op = IROp->C<IR::IROp_F80FPREM1>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80FPREM1>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FPREM) {
|
||||
auto Op = IROp->C<IR::IROp_F80FPREM>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80FPREM>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SCALE) {
|
||||
auto Op = IROp->C<IR::IROp_F80SCALE>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80SCALE>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80CVT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVT>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
|
||||
switch (OpSize) {
|
||||
case 4: {
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVT>::handle4(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, OpSize);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVT>::handle8(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, OpSize);
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTINT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTInt>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
|
||||
switch (OpSize) {
|
||||
case 2: {
|
||||
int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTTO) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTTo>();
|
||||
|
||||
switch (Op->SrcSize) {
|
||||
case 4: {
|
||||
float Src = *GetSrc<float *>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTO>::handle4(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
double Src = *GetSrc<double *>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTO>::handle8(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTTOINT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTToInt>();
|
||||
|
||||
switch (Op->SrcSize) {
|
||||
case 2: {
|
||||
int16_t Src = *GetSrc<int16_t*>(Data->SSAData, Op->Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle2(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
int32_t Src = *GetSrc<int32_t*>(Data->SSAData, Op->Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle4(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80ROUND) {
|
||||
auto Op = IROp->C<IR::IROp_F80Round>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80ROUND>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80F2XM1) {
|
||||
auto Op = IROp->C<IR::IROp_F80F2XM1>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80F2XM1>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80TAN) {
|
||||
auto Op = IROp->C<IR::IROp_F80TAN>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80TAN>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SQRT) {
|
||||
auto Op = IROp->C<IR::IROp_F80SQRT>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80SQRT>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SIN) {
|
||||
auto Op = IROp->C<IR::IROp_F80SIN>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80SIN>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80COS) {
|
||||
auto Op = IROp->C<IR::IROp_F80COS>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80COS>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80XTRACT_EXP) {
|
||||
auto Op = IROp->C<IR::IROp_F80XTRACT_EXP>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80XTRACT_EXP>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80XTRACT_SIG) {
|
||||
auto Op = IROp->C<IR::IROp_F80XTRACT_SIG>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80XTRACT_SIG>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80CMP) {
|
||||
auto Op = IROp->C<IR::IROp_F80Cmp>();
|
||||
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
|
||||
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
|
||||
const auto ResultFlags = CPU::OpHandlers<IR::OP_F80CMP>::handle<IR::FCMP_FLAG_LT | IR::FCMP_FLAG_UNORDERED | IR::FCMP_FLAG_EQ>(Data->State->CurrentFrame->State.FCW, Src1, Src2);
|
||||
|
||||
GD = ResultFlags;
|
||||
}
|
||||
|
||||
DEF_OP(F80BCDLOAD) {
|
||||
auto Op = IROp->C<IR::IROp_F80BCDLoad>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80BCDLOAD>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80BCDSTORE) {
|
||||
auto Op = IROp->C<IR::IROp_F80BCDStore>();
|
||||
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
|
||||
const auto Tmp = CPU::OpHandlers<IR::OP_F80BCDSTORE>::handle(Data->State->CurrentFrame->State.FCW, Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F64SIN) {
|
||||
auto Op = IROp->C<IR::IROp_F64SIN>();
|
||||
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
|
||||
const double Tmp = sin(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64COS) {
|
||||
auto Op = IROp->C<IR::IROp_F64COS>();
|
||||
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
|
||||
const double Tmp = cos(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64TAN) {
|
||||
auto Op = IROp->C<IR::IROp_F64TAN>();
|
||||
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
|
||||
const double Tmp = tan(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64F2XM1) {
|
||||
auto Op = IROp->C<IR::IROp_F64F2XM1>();
|
||||
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
|
||||
const double Tmp = exp2(Src) - 1.0;
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64ATAN) {
|
||||
auto Op = IROp->C<IR::IROp_F64ATAN>();
|
||||
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
|
||||
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
|
||||
const double Tmp = atan2(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64FPREM) {
|
||||
auto Op = IROp->C<IR::IROp_F64FPREM>();
|
||||
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
|
||||
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
|
||||
const double Tmp = fmod(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64FPREM1) {
|
||||
auto Op = IROp->C<IR::IROp_F64FPREM1>();
|
||||
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
|
||||
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
|
||||
const double Tmp = remainder(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64FYL2X) {
|
||||
auto Op = IROp->C<IR::IROp_F64FYL2X>();
|
||||
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src);
|
||||
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
|
||||
const double Tmp = Src2 * log2(Src1);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
DEF_OP(F64SCALE) {
|
||||
auto Op = IROp->C<IR::IROp_F64SCALE>();
|
||||
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
|
||||
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
|
||||
const double trunc = (double)(int64_t)(Src2); //truncate
|
||||
const double Tmp = Src1 * exp2(trunc);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(double));
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,22 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(GetHostFlag) {
|
||||
auto Op = IROp->C<IR::IROp_GetHostFlag>();
|
||||
GD = (*GetSrc<uint64_t*>(Data->SSAData, Op->Value) >> Op->Flag) & 1;
|
||||
}
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,424 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
#include "Interface/Context/Context.h"
|
||||
#include "Interface/Core/CPUID.h"
|
||||
#include "InterpreterDefines.h"
|
||||
#include "InterpreterOps.h"
|
||||
|
||||
#include <FEXCore/Core/CPUBackend.h>
|
||||
#include <FEXCore/Core/CoreState.h>
|
||||
#include <FEXCore/Debug/InternalThreadState.h>
|
||||
#include <FEXCore/HLE/SyscallHandler.h>
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/IR/IntrusiveIRList.h>
|
||||
#include <FEXCore/Utils/BitUtils.h>
|
||||
#include <FEXCore/Utils/CompilerDefs.h>
|
||||
#include <FEXCore/Utils/LogManager.h>
|
||||
|
||||
#include "Interface/HLE/Thunks/Thunks.h"
|
||||
|
||||
#include <alloca.h>
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
|
||||
using OpHandler = void (*)(IR::IROp_Header *IROp, InterpreterOps::IROpData *Data, IR::NodeID Node);
|
||||
using OpHandlerArray = std::array<OpHandler, IR::IROps::OP_LAST + 1>;
|
||||
|
||||
constexpr OpHandlerArray InterpreterOpHandlers = [] {
|
||||
OpHandlerArray Handlers{};
|
||||
for (auto& Entry : Handlers) {
|
||||
Entry = &InterpreterOps::Op_Unhandled;
|
||||
}
|
||||
|
||||
#define REGISTER_OP(op, x) Handlers[IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
|
||||
// ALU ops
|
||||
REGISTER_OP(TRUNCELEMENTPAIR, TruncElementPair);
|
||||
REGISTER_OP(CONSTANT, Constant);
|
||||
REGISTER_OP(ENTRYPOINTOFFSET, EntrypointOffset);
|
||||
REGISTER_OP(INLINECONSTANT, InlineConstant);
|
||||
REGISTER_OP(INLINEENTRYPOINTOFFSET, InlineEntrypointOffset);
|
||||
REGISTER_OP(CYCLECOUNTER, CycleCounter);
|
||||
REGISTER_OP(ADD, Add);
|
||||
REGISTER_OP(ADDNZCV, AddNZCV);
|
||||
REGISTER_OP(TESTNZ, TestNZ);
|
||||
REGISTER_OP(SUB, Sub);
|
||||
REGISTER_OP(SUBNZCV, SubNZCV);
|
||||
REGISTER_OP(NEG, Neg);
|
||||
REGISTER_OP(ABS, Abs);
|
||||
REGISTER_OP(MUL, Mul);
|
||||
REGISTER_OP(UMUL, UMul);
|
||||
REGISTER_OP(DIV, Div);
|
||||
REGISTER_OP(UDIV, UDiv);
|
||||
REGISTER_OP(REM, Rem);
|
||||
REGISTER_OP(UREM, URem);
|
||||
REGISTER_OP(MULH, MulH);
|
||||
REGISTER_OP(UMULH, UMulH);
|
||||
REGISTER_OP(OR, Or);
|
||||
REGISTER_OP(ORLSHL, Orlshl);
|
||||
REGISTER_OP(ORLSHR, Orlshr);
|
||||
REGISTER_OP(AND, And);
|
||||
REGISTER_OP(ANDN, Andn);
|
||||
REGISTER_OP(XOR, Xor);
|
||||
REGISTER_OP(LSHL, Lshl);
|
||||
REGISTER_OP(LSHR, Lshr);
|
||||
REGISTER_OP(ASHR, Ashr);
|
||||
REGISTER_OP(ROR, Ror);
|
||||
REGISTER_OP(EXTR, Extr);
|
||||
REGISTER_OP(PDEP, PDep);
|
||||
REGISTER_OP(PEXT, PExt);
|
||||
REGISTER_OP(LDIV, LDiv);
|
||||
REGISTER_OP(LUDIV, LUDiv);
|
||||
REGISTER_OP(LREM, LRem);
|
||||
REGISTER_OP(LUREM, LURem);
|
||||
REGISTER_OP(NOT, Not);
|
||||
REGISTER_OP(POPCOUNT, Popcount);
|
||||
REGISTER_OP(FINDLSB, FindLSB);
|
||||
REGISTER_OP(FINDMSB, FindMSB);
|
||||
REGISTER_OP(FINDTRAILINGZEROES, FindTrailingZeroes);
|
||||
REGISTER_OP(COUNTLEADINGZEROES, CountLeadingZeroes);
|
||||
REGISTER_OP(REV, Rev);
|
||||
REGISTER_OP(BFI, Bfi);
|
||||
REGISTER_OP(BFXIL, Bfxil);
|
||||
REGISTER_OP(BFE, Bfe);
|
||||
REGISTER_OP(SBFE, Sbfe);
|
||||
REGISTER_OP(SELECT, Select);
|
||||
REGISTER_OP(VEXTRACTTOGPR, VExtractToGPR);
|
||||
REGISTER_OP(FLOAT_TOGPR_ZS, Float_ToGPR_ZS);
|
||||
REGISTER_OP(FLOAT_TOGPR_S, Float_ToGPR_S);
|
||||
REGISTER_OP(FCMP, FCmp);
|
||||
|
||||
// Atomic ops
|
||||
REGISTER_OP(CASPAIR, CASPair);
|
||||
REGISTER_OP(CAS, CAS);
|
||||
REGISTER_OP(ATOMICADD, AtomicAdd);
|
||||
REGISTER_OP(ATOMICSUB, AtomicSub);
|
||||
REGISTER_OP(ATOMICAND, AtomicAnd);
|
||||
REGISTER_OP(ATOMICOR, AtomicOr);
|
||||
REGISTER_OP(ATOMICXOR, AtomicXor);
|
||||
REGISTER_OP(ATOMICSWAP, AtomicSwap);
|
||||
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
|
||||
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
|
||||
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
|
||||
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
|
||||
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
|
||||
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
|
||||
REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue);
|
||||
|
||||
// Branch ops
|
||||
REGISTER_OP(CALLBACKRETURN, CallbackReturn);
|
||||
REGISTER_OP(EXITFUNCTION, ExitFunction);
|
||||
REGISTER_OP(JUMP, Jump);
|
||||
REGISTER_OP(CONDJUMP, CondJump);
|
||||
REGISTER_OP(SYSCALL, Syscall);
|
||||
REGISTER_OP(INLINESYSCALL, InlineSyscall);
|
||||
REGISTER_OP(THUNK, Thunk);
|
||||
REGISTER_OP(VALIDATECODE, ValidateCode);
|
||||
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
|
||||
REGISTER_OP(CPUID, CPUID);
|
||||
REGISTER_OP(XGETBV, XGETBV);
|
||||
|
||||
// Conversion ops
|
||||
REGISTER_OP(VINSGPR, VInsGPR);
|
||||
REGISTER_OP(VCASTFROMGPR, VCastFromGPR);
|
||||
REGISTER_OP(VDUPFROMGPR, VDupFromGPR);
|
||||
REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S);
|
||||
REGISTER_OP(FLOAT_FTOF, Float_FToF);
|
||||
REGISTER_OP(VECTOR_STOF, Vector_SToF);
|
||||
REGISTER_OP(VECTOR_FTOZS, Vector_FToZS);
|
||||
REGISTER_OP(VECTOR_FTOS, Vector_FToS);
|
||||
REGISTER_OP(VECTOR_FTOF, Vector_FToF);
|
||||
REGISTER_OP(VECTOR_FTOI, Vector_FToI);
|
||||
|
||||
// Flag ops
|
||||
REGISTER_OP(GETHOSTFLAG, GetHostFlag);
|
||||
|
||||
// Memory ops
|
||||
REGISTER_OP(LOADCONTEXT, LoadContext);
|
||||
REGISTER_OP(STORECONTEXT, StoreContext);
|
||||
REGISTER_OP(LOADREGISTER, LoadRegister);
|
||||
REGISTER_OP(STOREREGISTER, StoreRegister);
|
||||
REGISTER_OP(LOADCONTEXTINDEXED, LoadContextIndexed);
|
||||
REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed);
|
||||
REGISTER_OP(SPILLREGISTER, SpillRegister);
|
||||
REGISTER_OP(FILLREGISTER, FillRegister);
|
||||
REGISTER_OP(LOADFLAG, LoadFlag);
|
||||
REGISTER_OP(STOREFLAG, StoreFlag);
|
||||
REGISTER_OP(LOADMEM, LoadMem);
|
||||
REGISTER_OP(STOREMEM, StoreMem);
|
||||
REGISTER_OP(LOADMEMTSO, LoadMem);
|
||||
REGISTER_OP(STOREMEMTSO, StoreMem);
|
||||
REGISTER_OP(VLOADVECTORMASKED, VLoadVectorMasked);
|
||||
REGISTER_OP(VSTOREVECTORMASKED, VStoreVectorMasked);
|
||||
REGISTER_OP(VLOADVECTORELEMENT, VLoadVectorElement);
|
||||
REGISTER_OP(VSTOREVECTORELEMENT, VStoreVectorElement);
|
||||
REGISTER_OP(VBROADCASTFROMMEM, VBroadcastFromMem);
|
||||
REGISTER_OP(PUSH, Push);
|
||||
REGISTER_OP(MEMSET, MemSet);
|
||||
REGISTER_OP(MEMCPY, MemCpy);
|
||||
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
|
||||
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
|
||||
REGISTER_OP(CACHELINEZERO, CacheLineZero);
|
||||
|
||||
// Misc ops
|
||||
REGISTER_OP(DUMMY, NoOp);
|
||||
REGISTER_OP(IRHEADER, NoOp);
|
||||
REGISTER_OP(CODEBLOCK, NoOp);
|
||||
REGISTER_OP(BEGINBLOCK, NoOp);
|
||||
REGISTER_OP(ENDBLOCK, NoOp);
|
||||
REGISTER_OP(GUESTOPCODE, NoOp);
|
||||
REGISTER_OP(FENCE, Fence);
|
||||
REGISTER_OP(BREAK, Break);
|
||||
REGISTER_OP(PRINT, Print);
|
||||
REGISTER_OP(GETROUNDINGMODE, GetRoundingMode);
|
||||
REGISTER_OP(SETROUNDINGMODE, SetRoundingMode);
|
||||
REGISTER_OP(INVALIDATEFLAGS, NoOp);
|
||||
REGISTER_OP(PROCESSORID, ProcessorID);
|
||||
REGISTER_OP(RDRAND, RDRAND);
|
||||
REGISTER_OP(YIELD, Yield);
|
||||
|
||||
// Move ops
|
||||
REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair);
|
||||
REGISTER_OP(CREATEELEMENTPAIR, CreateElementPair);
|
||||
|
||||
// Vector ops
|
||||
REGISTER_OP(VECTORZERO, VectorZero);
|
||||
REGISTER_OP(VECTORIMM, VectorImm);
|
||||
REGISTER_OP(LOADNAMEDVECTORCONSTANT, LoadNamedVectorConstant);
|
||||
REGISTER_OP(LOADNAMEDVECTORINDEXEDCONSTANT, LoadNamedVectorIndexedConstant);
|
||||
REGISTER_OP(VMOV, VMov);
|
||||
REGISTER_OP(VAND, VAnd);
|
||||
REGISTER_OP(VBIC, VBic);
|
||||
REGISTER_OP(VOR, VOr);
|
||||
REGISTER_OP(VXOR, VXor);
|
||||
REGISTER_OP(VADD, VAdd);
|
||||
REGISTER_OP(VSUB, VSub);
|
||||
REGISTER_OP(VUQADD, VUQAdd);
|
||||
REGISTER_OP(VUQSUB, VUQSub);
|
||||
REGISTER_OP(VSQADD, VSQAdd);
|
||||
REGISTER_OP(VSQSUB, VSQSub);
|
||||
REGISTER_OP(VADDP, VAddP);
|
||||
REGISTER_OP(VADDV, VAddV);
|
||||
REGISTER_OP(VUMINV, VUMinV);
|
||||
REGISTER_OP(VURAVG, VURAvg);
|
||||
REGISTER_OP(VABS, VAbs);
|
||||
REGISTER_OP(VPOPCOUNT, VPopcount);
|
||||
REGISTER_OP(VFADD, VFAdd);
|
||||
REGISTER_OP(VFADDP, VFAddP);
|
||||
REGISTER_OP(VFSUB, VFSub);
|
||||
REGISTER_OP(VFMUL, VFMul);
|
||||
REGISTER_OP(VFDIV, VFDiv);
|
||||
REGISTER_OP(VFMIN, VFMin);
|
||||
REGISTER_OP(VFMAX, VFMax);
|
||||
REGISTER_OP(VFRECP, VFRecp);
|
||||
REGISTER_OP(VFSQRT, VFSqrt);
|
||||
REGISTER_OP(VFRSQRT, VFRSqrt);
|
||||
REGISTER_OP(VNEG, VNeg);
|
||||
REGISTER_OP(VFNEG, VFNeg);
|
||||
REGISTER_OP(VNOT, VNot);
|
||||
REGISTER_OP(VUMIN, VUMin);
|
||||
REGISTER_OP(VSMIN, VSMin);
|
||||
REGISTER_OP(VUMAX, VUMax);
|
||||
REGISTER_OP(VSMAX, VSMax);
|
||||
REGISTER_OP(VZIP, VZip);
|
||||
REGISTER_OP(VZIP2, VZip);
|
||||
REGISTER_OP(VUNZIP, VUnZip);
|
||||
REGISTER_OP(VUNZIP2, VUnZip);
|
||||
REGISTER_OP(VTRN, VTrn);
|
||||
REGISTER_OP(VTRN2, VTrn);
|
||||
REGISTER_OP(VBSL, VBSL);
|
||||
REGISTER_OP(VCMPEQ, VCMPEQ);
|
||||
REGISTER_OP(VCMPEQZ, VCMPEQZ);
|
||||
REGISTER_OP(VCMPGT, VCMPGT);
|
||||
REGISTER_OP(VCMPGTZ, VCMPGTZ);
|
||||
REGISTER_OP(VCMPLTZ, VCMPLTZ);
|
||||
REGISTER_OP(VFCMPEQ, VFCMPEQ);
|
||||
REGISTER_OP(VFCMPNEQ, VFCMPNEQ);
|
||||
REGISTER_OP(VFCMPLT, VFCMPLT);
|
||||
REGISTER_OP(VFCMPGT, VFCMPGT);
|
||||
REGISTER_OP(VFCMPLE, VFCMPLE);
|
||||
REGISTER_OP(VFCMPORD, VFCMPORD);
|
||||
REGISTER_OP(VFCMPUNO, VFCMPUNO);
|
||||
REGISTER_OP(VUSHL, VUShl);
|
||||
REGISTER_OP(VUSHR, VUShr);
|
||||
REGISTER_OP(VSSHR, VSShr);
|
||||
REGISTER_OP(VUSHLS, VUShlS);
|
||||
REGISTER_OP(VUSHRS, VUShrS);
|
||||
REGISTER_OP(VSSHRS, VSShrS);
|
||||
REGISTER_OP(VUSHLSWIDE, VUShlSWide);
|
||||
REGISTER_OP(VUSHRSWIDE, VUShrSWide);
|
||||
REGISTER_OP(VSSHRSWIDE, VSShrSWide);
|
||||
REGISTER_OP(VINSELEMENT, VInsElement);
|
||||
REGISTER_OP(VDUPELEMENT, VDupElement);
|
||||
REGISTER_OP(VEXTR, VExtr);
|
||||
REGISTER_OP(VUSHRI, VUShrI);
|
||||
REGISTER_OP(VSSHRI, VSShrI);
|
||||
REGISTER_OP(VSHLI, VShlI);
|
||||
REGISTER_OP(VUSHRNI, VUShrNI);
|
||||
REGISTER_OP(VUSHRNI2, VUShrNI2);
|
||||
REGISTER_OP(VSXTL, VSXTL);
|
||||
REGISTER_OP(VSXTL2, VSXTL2);
|
||||
REGISTER_OP(VUXTL, VUXTL);
|
||||
REGISTER_OP(VUXTL2, VUXTL2);
|
||||
REGISTER_OP(VSQXTN, VSQXTN);
|
||||
REGISTER_OP(VSQXTN2, VSQXTN2);
|
||||
REGISTER_OP(VSQXTNPAIR, VSQXTNPair);
|
||||
REGISTER_OP(VSQXTUN, VSQXTUN);
|
||||
REGISTER_OP(VSQXTUN2, VSQXTUN2);
|
||||
REGISTER_OP(VSQXTUNPAIR, VSQXTUNPair);
|
||||
REGISTER_OP(VUMUL, VUMul);
|
||||
REGISTER_OP(VSMUL, VSMul);
|
||||
REGISTER_OP(VUMULL, VUMull);
|
||||
REGISTER_OP(VSMULL, VSMull);
|
||||
REGISTER_OP(VUMULL2, VUMull2);
|
||||
REGISTER_OP(VSMULL2, VSMull2);
|
||||
REGISTER_OP(VUMULH, VUMulH);
|
||||
REGISTER_OP(VSMULH, VSMulH);
|
||||
REGISTER_OP(VUABDL, VUABDL);
|
||||
REGISTER_OP(VUABDL2, VUABDL2);
|
||||
REGISTER_OP(VTBL1, VTBL1);
|
||||
REGISTER_OP(VTBL2, VTBL2);
|
||||
REGISTER_OP(VREV32, VRev32);
|
||||
REGISTER_OP(VREV64, VRev64);
|
||||
REGISTER_OP(VPCMPESTRX, VPCMPESTRX);
|
||||
REGISTER_OP(VPCMPISTRX, VPCMPISTRX);
|
||||
REGISTER_OP(VFCADD, VFCADD);
|
||||
|
||||
// Encryption ops
|
||||
REGISTER_OP(VAESIMC, AESImc);
|
||||
REGISTER_OP(VAESENC, AESEnc);
|
||||
REGISTER_OP(VAESENCLAST, AESEncLast);
|
||||
REGISTER_OP(VAESDEC, AESDec);
|
||||
REGISTER_OP(VAESDECLAST, AESDecLast);
|
||||
REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist);
|
||||
REGISTER_OP(CRC32, CRC32);
|
||||
REGISTER_OP(PCLMUL, PCLMUL);
|
||||
|
||||
// F80 ops
|
||||
REGISTER_OP(F80ADD, F80ADD);
|
||||
REGISTER_OP(F80SUB, F80SUB);
|
||||
REGISTER_OP(F80MUL, F80MUL);
|
||||
REGISTER_OP(F80DIV, F80DIV);
|
||||
REGISTER_OP(F80FYL2X, F80FYL2X);
|
||||
REGISTER_OP(F80ATAN, F80ATAN);
|
||||
REGISTER_OP(F80FPREM1, F80FPREM1);
|
||||
REGISTER_OP(F80FPREM, F80FPREM);
|
||||
REGISTER_OP(F80SCALE, F80SCALE);
|
||||
REGISTER_OP(F80CVT, F80CVT);
|
||||
REGISTER_OP(F80CVTINT, F80CVTINT);
|
||||
REGISTER_OP(F80CVTTO, F80CVTTO);
|
||||
REGISTER_OP(F80CVTTOINT, F80CVTTOINT);
|
||||
REGISTER_OP(F80ROUND, F80ROUND);
|
||||
REGISTER_OP(F80F2XM1, F80F2XM1);
|
||||
REGISTER_OP(F80TAN, F80TAN);
|
||||
REGISTER_OP(F80SQRT, F80SQRT);
|
||||
REGISTER_OP(F80SIN, F80SIN);
|
||||
REGISTER_OP(F80COS, F80COS);
|
||||
REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP);
|
||||
REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG);
|
||||
REGISTER_OP(F80CMP, F80CMP);
|
||||
REGISTER_OP(F80BCDLOAD, F80BCDLOAD);
|
||||
REGISTER_OP(F80BCDSTORE, F80BCDSTORE);
|
||||
|
||||
// F64 ops
|
||||
REGISTER_OP(F64SIN, F64SIN);
|
||||
REGISTER_OP(F64COS, F64COS);
|
||||
REGISTER_OP(F64TAN, F64TAN);
|
||||
REGISTER_OP(F64F2XM1, F64F2XM1);
|
||||
REGISTER_OP(F64ATAN, F64ATAN);
|
||||
REGISTER_OP(F64FPREM, F64FPREM);
|
||||
REGISTER_OP(F64FPREM1, F64FPREM1);
|
||||
REGISTER_OP(F64FYL2X, F64FYL2X);
|
||||
REGISTER_OP(F64SCALE, F64SCALE);
|
||||
|
||||
return Handlers;
|
||||
}();
|
||||
|
||||
void InterpreterOps::Op_Unhandled(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) {
|
||||
LOGMAN_MSG_A_FMT("Unhandled IR Op: {}", FEXCore::IR::GetName(IROp->Op));
|
||||
}
|
||||
|
||||
void InterpreterOps::Op_NoOp(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) {
|
||||
}
|
||||
|
||||
void InterpreterOps::InterpretIR(FEXCore::Core::CpuStateFrame *Frame, FEXCore::IR::IRListView const *CurrentIR) {
|
||||
volatile void *StackEntry = alloca(0);
|
||||
|
||||
const uintptr_t ListSize = CurrentIR->GetSSACount();
|
||||
|
||||
static_assert(sizeof(FEXCore::IR::OrderedNode) == 16);
|
||||
|
||||
auto BlockEnd = CurrentIR->GetBlocks().end();
|
||||
|
||||
// SSA data elements must be able to accommodate data that would
|
||||
// fit inside the largest vector size (otherwise vector operations
|
||||
// go kaboom, and we don't want that).
|
||||
const size_t SSADataSize = ListSize * MaxInterpeterVectorSize;
|
||||
|
||||
InterpreterOps::IROpData OpData{
|
||||
.State = Frame->Thread,
|
||||
.CurrentEntry = Frame->State.rip,
|
||||
.CurrentIR = CurrentIR,
|
||||
.StackEntry = StackEntry,
|
||||
.SSAData = alloca(SSADataSize),
|
||||
.BlockResults = {},
|
||||
.BlockIterator = CurrentIR->GetBlocks().begin(),
|
||||
};
|
||||
|
||||
// Clear all SSAData entries to zero. Required for Zero-extend semantics
|
||||
memset(OpData.SSAData, 0, SSADataSize);
|
||||
|
||||
while (1) {
|
||||
using namespace FEXCore::IR;
|
||||
auto [BlockNode, BlockHeader] = OpData.BlockIterator();
|
||||
auto BlockIROp = BlockHeader->CW<IROp_CodeBlock>();
|
||||
LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block");
|
||||
|
||||
// Reset the block results per block
|
||||
memset(&OpData.BlockResults, 0, sizeof(OpData.BlockResults));
|
||||
|
||||
auto CodeBegin = CurrentIR->at(BlockIROp->Begin);
|
||||
auto CodeLast = CurrentIR->at(BlockIROp->Last);
|
||||
|
||||
for (auto [CodeNode, IROp] : CurrentIR->GetCode(BlockNode)) {
|
||||
const auto ID = CurrentIR->GetID(CodeNode);
|
||||
const uint32_t Op = IROp->Op;
|
||||
|
||||
// Execute handler
|
||||
OpHandler Handler = InterpreterOpHandlers[Op];
|
||||
|
||||
Handler(IROp, &OpData, ID);
|
||||
|
||||
if (OpData.BlockResults.Quit ||
|
||||
OpData.BlockResults.Redo ||
|
||||
CodeBegin == CodeLast) {
|
||||
break;
|
||||
}
|
||||
|
||||
++CodeBegin;
|
||||
}
|
||||
|
||||
// Iterator will have been set, go again
|
||||
if (OpData.BlockResults.Redo) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have set to early exit or at the end block then leave
|
||||
if (OpData.BlockResults.Quit || ++OpData.BlockIterator == BlockEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,859 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/CPUID.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
static inline void CacheLineFlush(char *Addr) {
|
||||
#ifdef _M_X86_64
|
||||
__asm volatile (
|
||||
"clflush (%[Addr]);"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#else
|
||||
__builtin___clear_cache(Addr, Addr+64);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void CacheLineClean(char *Addr) {
|
||||
#ifdef _M_X86_64
|
||||
__asm volatile (
|
||||
"clwb (%[Addr]);"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#elif _M_ARM_64
|
||||
__asm volatile (
|
||||
"dc cvac, %[Addr]"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#else
|
||||
LOGMAN_THROW_A_FMT("Unsupported architecture with cacheline clean");
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(LoadContext) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadContext>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Src = ContextPtr + Op->Offset;
|
||||
|
||||
#define LOAD_CTX(x, y) \
|
||||
case x: { \
|
||||
y const *MemData = reinterpret_cast<y const*>(Src); \
|
||||
GD = *MemData; \
|
||||
break; \
|
||||
}
|
||||
|
||||
switch (OpSize) {
|
||||
LOAD_CTX(1, uint8_t)
|
||||
LOAD_CTX(2, uint16_t)
|
||||
LOAD_CTX(4, uint32_t)
|
||||
LOAD_CTX(8, uint64_t)
|
||||
case 16:
|
||||
case 32: {
|
||||
void const *MemData = reinterpret_cast<void const*>(Src);
|
||||
memcpy(GDP, MemData, OpSize);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
|
||||
break;
|
||||
}
|
||||
#undef LOAD_CTX
|
||||
}
|
||||
|
||||
DEF_OP(StoreContext) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreContext>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Dst = ContextPtr + Op->Offset;
|
||||
|
||||
void *MemData = reinterpret_cast<void*>(Dst);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
|
||||
memcpy(MemData, Src, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(LoadRegister) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadRegister>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Src = ContextPtr + Op->Offset;
|
||||
|
||||
#define LOAD_CTX(x, y) \
|
||||
case x: { \
|
||||
y const *MemData = reinterpret_cast<y const*>(Src); \
|
||||
GD = *MemData; \
|
||||
break; \
|
||||
}
|
||||
|
||||
switch (OpSize) {
|
||||
LOAD_CTX(1, uint8_t)
|
||||
LOAD_CTX(2, uint16_t)
|
||||
LOAD_CTX(4, uint32_t)
|
||||
LOAD_CTX(8, uint64_t)
|
||||
case 16:
|
||||
case 32: {
|
||||
void const *MemData = reinterpret_cast<void const*>(Src);
|
||||
memcpy(GDP, MemData, OpSize);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
|
||||
break;
|
||||
}
|
||||
#undef LOAD_CTX
|
||||
}
|
||||
|
||||
DEF_OP(StoreRegister) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreRegister>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Dst = ContextPtr + Op->Offset;
|
||||
|
||||
void *MemData = reinterpret_cast<void*>(Dst);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
|
||||
memcpy(MemData, Src, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(LoadContextIndexed) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Index);
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Src = ContextPtr + Op->BaseOffset + (Index * Op->Stride);
|
||||
|
||||
#define LOAD_CTX(x, y) \
|
||||
case x: { \
|
||||
y const *MemData = reinterpret_cast<y const*>(Src); \
|
||||
GD = *MemData; \
|
||||
break; \
|
||||
}
|
||||
|
||||
switch (OpSize) {
|
||||
LOAD_CTX(1, uint8_t)
|
||||
LOAD_CTX(2, uint16_t)
|
||||
LOAD_CTX(4, uint32_t)
|
||||
LOAD_CTX(8, uint64_t)
|
||||
case 16:
|
||||
case 32: {
|
||||
void const *MemData = reinterpret_cast<void const*>(Src);
|
||||
memcpy(GDP, MemData, OpSize);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize);
|
||||
break;
|
||||
}
|
||||
#undef LOAD_CTX
|
||||
}
|
||||
|
||||
DEF_OP(StoreContextIndexed) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Index);
|
||||
|
||||
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
const auto Dst = ContextPtr + Op->BaseOffset + (Index * Op->Stride);
|
||||
|
||||
void *MemData = reinterpret_cast<void*>(Dst);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
|
||||
memcpy(MemData, Src, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(SpillRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(FillRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(LoadFlag) {
|
||||
auto Op = IROp->C<IR::IROp_LoadFlag>();
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
|
||||
ContextPtr += Op->Flag;
|
||||
|
||||
if (Op->Flag == 24 /* NZCV */) {
|
||||
uint32_t const *MemData = reinterpret_cast<uint32_t const*>(ContextPtr);
|
||||
GD = *MemData;
|
||||
} else {
|
||||
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
|
||||
GD = *MemData;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreFlag) {
|
||||
auto Op = IROp->C<IR::IROp_StoreFlag>();
|
||||
uint32_t Arg = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
|
||||
ContextPtr += Op->Flag;
|
||||
|
||||
if (Op->Flag == 24 /* NZCV */) {
|
||||
uint32_t *MemData = reinterpret_cast<uint32_t*>(ContextPtr);
|
||||
*MemData = Arg;
|
||||
} else {
|
||||
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
|
||||
*MemData = Arg;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(LoadMem) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadMem>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
uint8_t const *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
|
||||
memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE);
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
auto D = reinterpret_cast<const std::atomic<uint8_t>*>(MemData);
|
||||
GD = D->load();
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto D = reinterpret_cast<const std::atomic<uint16_t>*>(MemData);
|
||||
GD = D->load();
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
auto D = reinterpret_cast<const std::atomic<uint32_t>*>(MemData);
|
||||
GD = D->load();
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
auto D = reinterpret_cast<const std::atomic<uint64_t>*>(MemData);
|
||||
GD = D->load();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
memcpy(GDP, MemData, OpSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreMem) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreMem>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
uint8_t *MemData = *GetSrc<uint8_t **>(Data->SSAData, Op->Addr);
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
reinterpret_cast<std::atomic<uint8_t>*>(MemData)->store(*GetSrc<uint8_t*>(Data->SSAData, Op->Value));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
reinterpret_cast<std::atomic<uint16_t>*>(MemData)->store(*GetSrc<uint16_t*>(Data->SSAData, Op->Value));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(MemData)->store(*GetSrc<uint32_t*>(Data->SSAData, Op->Value));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
reinterpret_cast<std::atomic<uint64_t>*>(MemData)->store(*GetSrc<uint64_t*>(Data->SSAData, Op->Value));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
memcpy(MemData, GetSrc<void*>(Data->SSAData, Op->Value), OpSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VLoadVectorMasked) {
|
||||
const auto Op = IROp->C<IR::IROp_VLoadVectorMasked>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
const auto NumElements = OpSize / ElementSize;
|
||||
|
||||
const auto *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
|
||||
const auto *Mask = GetSrc<uint8_t const*>(Data->SSAData, Op->Mask);
|
||||
|
||||
const auto SetElements = [NumElements]<typename T>(void* Dst, const T* MaskValues, const T* MemoryData) {
|
||||
const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1);
|
||||
for (size_t i = 0; i < NumElements; i++) {
|
||||
if ((MaskValues[i] & SignBit) != 0) {
|
||||
std::memcpy(static_cast<uint8_t*>(Dst) + (i * sizeof(T)), MemoryData + i, sizeof(T));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
|
||||
memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE);
|
||||
switch (ElementSize) {
|
||||
case 1: {
|
||||
SetElements(GDP, Mask, MemData);
|
||||
return;
|
||||
}
|
||||
case 2: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint16_t*>(Mask),
|
||||
reinterpret_cast<const uint16_t*>(MemData));
|
||||
return;
|
||||
}
|
||||
case 4: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint32_t*>(Mask),
|
||||
reinterpret_cast<const uint32_t*>(MemData));
|
||||
return;
|
||||
}
|
||||
case 8: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint64_t*>(Mask),
|
||||
reinterpret_cast<const uint64_t*>(MemData));
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled VLoadVectorMasked element size: {}", ElementSize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VStoreVectorMasked) {
|
||||
const auto Op = IROp->C<IR::IROp_VStoreVectorMasked>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
const auto NumElements = OpSize / ElementSize;
|
||||
|
||||
auto *Dst = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
|
||||
const auto *RegData = GetSrc<uint8_t const*>(Data->SSAData, Op->Data);
|
||||
const auto *Mask = GetSrc<uint8_t const*>(Data->SSAData, Op->Mask);
|
||||
|
||||
const auto SetElements = [NumElements]<typename T>(void* Dst, const T* MaskValues, const T* DataVals) {
|
||||
const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1);
|
||||
for (size_t i = 0; i < NumElements; i++) {
|
||||
if ((MaskValues[i] & SignBit) != 0) {
|
||||
std::memcpy(static_cast<uint8_t*>(Dst) + (i * sizeof(T)), DataVals + i, sizeof(T));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: Dst += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: Dst += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: Dst += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (ElementSize) {
|
||||
case 1: {
|
||||
SetElements(Dst, Mask, RegData);
|
||||
return;
|
||||
}
|
||||
case 2: {
|
||||
SetElements(Dst,
|
||||
reinterpret_cast<const uint16_t*>(Mask),
|
||||
reinterpret_cast<const uint16_t*>(RegData));
|
||||
return;
|
||||
}
|
||||
case 4: {
|
||||
SetElements(Dst,
|
||||
reinterpret_cast<const uint32_t*>(Mask),
|
||||
reinterpret_cast<const uint32_t*>(RegData));
|
||||
return;
|
||||
}
|
||||
case 8: {
|
||||
SetElements(Dst,
|
||||
reinterpret_cast<const uint64_t*>(Mask),
|
||||
reinterpret_cast<const uint64_t*>(RegData));
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled VStoreVectorMasked element size: {}", ElementSize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VLoadVectorElement) {
|
||||
const auto Op = IROp->C<IR::IROp_VLoadVectorElement>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
|
||||
auto *Mem = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
|
||||
const auto *DstSrc = GetSrc<uint8_t const*>(Data->SSAData, Op->DstSrc);
|
||||
|
||||
const auto SetElements = []<typename T>(void* Dst, const T* MemPtr, const auto Index) {
|
||||
std::memcpy(static_cast<uint8_t*>(Dst) + (Index * sizeof(T)), MemPtr, sizeof(T));
|
||||
};
|
||||
|
||||
// Copy the source data first.
|
||||
memcpy(GDP, DstSrc, OpSize);
|
||||
|
||||
switch (ElementSize) {
|
||||
case 1: {
|
||||
SetElements(GDP,
|
||||
Mem,
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 2: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint16_t*>(Mem),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 4: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint32_t*>(Mem),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 8: {
|
||||
SetElements(GDP,
|
||||
reinterpret_cast<const uint64_t*>(Mem),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VStoreVectorElement) {
|
||||
const auto Op = IROp->C<IR::IROp_VStoreVectorElement>();
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
|
||||
auto *Mem = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
|
||||
const auto *Value = GetSrc<uint8_t const*>(Data->SSAData, Op->Value);
|
||||
|
||||
const auto StoreElements = []<typename T>(void* MemPtr, const T* Src, const auto Index) {
|
||||
std::memcpy(MemPtr, reinterpret_cast<const uint8_t*>(Src) + (Index * sizeof(T)), sizeof(T));
|
||||
};
|
||||
|
||||
switch (ElementSize) {
|
||||
case 1: {
|
||||
StoreElements(Mem,
|
||||
Value,
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 2: {
|
||||
StoreElements(Mem,
|
||||
reinterpret_cast<const uint16_t*>(Value),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 4: {
|
||||
StoreElements(Mem,
|
||||
reinterpret_cast<const uint32_t*>(Value),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
case 8: {
|
||||
StoreElements(Mem,
|
||||
reinterpret_cast<const uint64_t*>(Value),
|
||||
Op->Index);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VBroadcastFromMem) {
|
||||
const auto Op = IROp->C<IR::IROp_VBroadcastFromMem>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
const auto NumElements = OpSize / ElementSize;
|
||||
|
||||
const auto *MemData = *GetSrc<const uint8_t**>(Data->SSAData, Op->Address);
|
||||
|
||||
const auto BroadcastElement = [NumElements]<typename T>(void* Dst, const T* MemPtr) {
|
||||
auto* DstU8 = static_cast<uint8_t*>(Dst);
|
||||
|
||||
for (size_t i = 0; i < NumElements; i++) {
|
||||
std::memcpy(DstU8 + (i * sizeof(T)), MemPtr, sizeof(T));
|
||||
}
|
||||
};
|
||||
|
||||
switch (ElementSize) {
|
||||
case 1:
|
||||
BroadcastElement(GDP, MemData);
|
||||
break;
|
||||
case 2:
|
||||
BroadcastElement(GDP, reinterpret_cast<const uint16_t*>(MemData));
|
||||
break;
|
||||
case 4:
|
||||
BroadcastElement(GDP, reinterpret_cast<const uint32_t*>(MemData));
|
||||
break;
|
||||
case 8:
|
||||
BroadcastElement(GDP, reinterpret_cast<const uint64_t*>(MemData));
|
||||
break;
|
||||
case 16:
|
||||
BroadcastElement(GDP, reinterpret_cast<const __uint128_t*>(MemData));
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem element size: {}", ElementSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Push) {
|
||||
const auto Op = IROp->C<IR::IROp_Push>();
|
||||
const auto ValueSize = Op->ValueSize;
|
||||
|
||||
uint64_t MemData = *GetSrc<uint64_t*>(Data->SSAData, Op->Addr);
|
||||
|
||||
switch (ValueSize) {
|
||||
case 1: {
|
||||
*reinterpret_cast<uint8_t*>(MemData - ValueSize) = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
*reinterpret_cast<uint16_t*>(MemData - ValueSize) = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
*reinterpret_cast<uint32_t*>(MemData - ValueSize) = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
*reinterpret_cast<uint64_t*>(MemData - ValueSize) = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize);
|
||||
break;
|
||||
}
|
||||
|
||||
GD = MemData - ValueSize;
|
||||
}
|
||||
|
||||
DEF_OP(MemSet) {
|
||||
const auto Op = IROp->C<IR::IROp_MemSet>();
|
||||
const int32_t Size = Op->Size;
|
||||
|
||||
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
|
||||
uint64_t MemPrefix{};
|
||||
if (!Op->Prefix.IsInvalid()) {
|
||||
MemPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->Prefix);
|
||||
}
|
||||
|
||||
const auto Value = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
const auto Length = *GetSrc<uint64_t*>(Data->SSAData, Op->Length);
|
||||
const auto Direction = *GetSrc<uint8_t*>(Data->SSAData, Op->Direction);
|
||||
|
||||
auto MemSetElements = [](auto* Memory, uint64_t Value, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
Memory[i] = Value;
|
||||
}
|
||||
};
|
||||
|
||||
auto MemSetElementsInverse = [](auto* Memory, uint64_t Value, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
Memory[-i] = Value;
|
||||
}
|
||||
};
|
||||
|
||||
if (Direction == 0) { // Forward
|
||||
if (Op->IsAtomic) {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElements(reinterpret_cast<std::atomic<uint8_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElements(reinterpret_cast<std::atomic<uint16_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElements(reinterpret_cast<std::atomic<uint32_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElements(reinterpret_cast<std::atomic<uint64_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElements(reinterpret_cast<uint8_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElements(reinterpret_cast<uint16_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElements(reinterpret_cast<uint32_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElements(reinterpret_cast<uint64_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
GD = reinterpret_cast<uint64_t>(MemData + (Length * Size));
|
||||
}
|
||||
else { // Backward
|
||||
if (Op->IsAtomic) {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElementsInverse(reinterpret_cast<std::atomic<uint8_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElementsInverse(reinterpret_cast<std::atomic<uint16_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElementsInverse(reinterpret_cast<std::atomic<uint32_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElementsInverse(reinterpret_cast<std::atomic<uint64_t>*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElementsInverse(reinterpret_cast<uint8_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElementsInverse(reinterpret_cast<uint16_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElementsInverse(reinterpret_cast<uint32_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElementsInverse(reinterpret_cast<uint64_t*>(MemData + MemPrefix), Value, Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
GD = reinterpret_cast<uint64_t>(MemData - (Length * Size));
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(MemCpy) {
|
||||
const auto Op = IROp->C<IR::IROp_MemCpy>();
|
||||
const int32_t Size = Op->Size;
|
||||
|
||||
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
|
||||
|
||||
char *MemDataDest = *GetSrc<char **>(Data->SSAData, Op->AddrDest);
|
||||
char *MemDataSrc = *GetSrc<char **>(Data->SSAData, Op->AddrSrc);
|
||||
|
||||
uint64_t DestPrefix{};
|
||||
uint64_t SrcPrefix{};
|
||||
if (!Op->PrefixDest.IsInvalid()) {
|
||||
DestPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->PrefixDest);
|
||||
|
||||
}
|
||||
if (!Op->PrefixSrc.IsInvalid()) {
|
||||
SrcPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->PrefixSrc);
|
||||
}
|
||||
|
||||
const auto Length = *GetSrc<uint64_t*>(Data->SSAData, Op->Length);
|
||||
const auto Direction = *GetSrc<uint8_t*>(Data->SSAData, Op->Direction);
|
||||
|
||||
auto MemSetElementsAtomic = [](auto* MemDst, auto* MemSrc, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
MemDst[i].store(MemSrc[i].load());
|
||||
}
|
||||
};
|
||||
|
||||
auto MemSetElementsAtomicInverse = [](auto* MemDst, auto* MemSrc, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
MemDst[-i].store(MemSrc[-i].load());
|
||||
}
|
||||
};
|
||||
|
||||
auto MemSetElements = [](auto* MemDst, auto* MemSrc, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
MemDst[i] = MemSrc[i];
|
||||
}
|
||||
};
|
||||
|
||||
auto MemSetElementsInverse = [](auto* MemDst, auto* MemSrc, size_t Length) {
|
||||
for (size_t i = 0; i < Length; ++i) {
|
||||
MemDst[-i] = MemSrc[-i];
|
||||
}
|
||||
};
|
||||
|
||||
if (Direction == 0) { // Forward
|
||||
if (Op->IsAtomic) {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint8_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint8_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint16_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint16_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint32_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint32_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint64_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint64_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElements(reinterpret_cast<uint8_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint8_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElements(reinterpret_cast<uint16_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint16_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElements(reinterpret_cast<uint32_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint32_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElements(reinterpret_cast<uint64_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint64_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
DstPtr[0] = reinterpret_cast<uint64_t>(MemDataDest + (Length * Size));
|
||||
DstPtr[1] = reinterpret_cast<uint64_t>(MemDataSrc + (Length * Size));
|
||||
}
|
||||
else { // Backward
|
||||
if (Op->IsAtomic) {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint8_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint8_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint16_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint16_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint32_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint32_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint64_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint64_t>*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (Size) {
|
||||
case 1:
|
||||
MemSetElementsInverse(reinterpret_cast<uint8_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint8_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 2:
|
||||
MemSetElementsInverse(reinterpret_cast<uint16_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint16_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 4:
|
||||
MemSetElementsInverse(reinterpret_cast<uint32_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint32_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
case 8:
|
||||
MemSetElementsInverse(reinterpret_cast<uint64_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint64_t*>(MemDataSrc + SrcPrefix), Length);
|
||||
break;
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
DstPtr[0] = reinterpret_cast<uint64_t>(MemDataDest - (Length * Size));
|
||||
DstPtr[1] = reinterpret_cast<uint64_t>(MemDataSrc - (Length * Size));
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClear) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClear>();
|
||||
|
||||
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
|
||||
|
||||
// 64-byte cache line clear
|
||||
CacheLineFlush(MemData);
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClean) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClean>();
|
||||
|
||||
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
|
||||
|
||||
// 64-byte cache line clear
|
||||
CacheLineClean(MemData);
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineZero) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineZero>();
|
||||
|
||||
uintptr_t MemData = *GetSrc<uintptr_t*>(Data->SSAData, Op->Addr);
|
||||
|
||||
// Force cacheline alignment
|
||||
MemData = MemData & ~(CPUIDEmu::CACHELINE_SIZE - 1);
|
||||
|
||||
using DataType = uint64_t;
|
||||
DataType *MemData64 = reinterpret_cast<DataType*>(MemData);
|
||||
|
||||
// 64-byte cache line zero
|
||||
for (size_t i = 0; i < (CPUIDEmu::CACHELINE_SIZE / sizeof(DataType)); ++i) {
|
||||
MemData64[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
} // namespace FEXCore::CPU
|
@ -1,175 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Context/Context.h"
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <FEXHeaderUtils/Syscalls.h>
|
||||
|
||||
#include <cstdint>
|
||||
#ifdef _M_X86_64
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#include <sys/random.h>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(Fence) {
|
||||
auto Op = IROp->C<IR::IROp_Fence>();
|
||||
switch (Op->Fence) {
|
||||
case IR::Fence_Load.Val:
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
break;
|
||||
case IR::Fence_LoadStore.Val:
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
break;
|
||||
case IR::Fence_Store.Val:
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
break;
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Break) {
|
||||
auto Op = IROp->C<IR::IROp_Break>();
|
||||
|
||||
Data->State->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = 1;
|
||||
Data->State->CurrentFrame->SynchronousFaultData.Signal = Op->Reason.Signal;
|
||||
Data->State->CurrentFrame->SynchronousFaultData.TrapNo = Op->Reason.TrapNumber;
|
||||
Data->State->CurrentFrame->SynchronousFaultData.err_code = Op->Reason.ErrorRegister;
|
||||
Data->State->CurrentFrame->SynchronousFaultData.si_code = Op->Reason.si_code;
|
||||
|
||||
switch (Op->Reason.Signal) {
|
||||
case SIGILL:
|
||||
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGILL);
|
||||
break;
|
||||
case SIGTRAP:
|
||||
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP);
|
||||
break;
|
||||
case SIGSEGV:
|
||||
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGSEGV);
|
||||
break;
|
||||
default:
|
||||
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(GetRoundingMode) {
|
||||
uint32_t GuestRounding{};
|
||||
#ifdef _M_ARM_64
|
||||
uint64_t Tmp{};
|
||||
__asm(R"(
|
||||
mrs %[Tmp], FPCR;
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp));
|
||||
// Extract the rounding
|
||||
// On ARM the ordering is different than on x86
|
||||
GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0;
|
||||
uint8_t RoundingMode = (Tmp >> 22) & 0b11;
|
||||
if (RoundingMode == 0)
|
||||
GuestRounding |= IR::ROUND_MODE_NEAREST;
|
||||
else if (RoundingMode == 1)
|
||||
GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY;
|
||||
else if (RoundingMode == 2)
|
||||
GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY;
|
||||
else if (RoundingMode == 3)
|
||||
GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO;
|
||||
#else
|
||||
GuestRounding = _mm_getcsr();
|
||||
|
||||
// Extract the rounding
|
||||
GuestRounding = (GuestRounding >> 13) & 0b111;
|
||||
#endif
|
||||
memcpy(GDP, &GuestRounding, sizeof(GuestRounding));
|
||||
}
|
||||
|
||||
DEF_OP(SetRoundingMode) {
|
||||
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
|
||||
const auto GuestRounding = *GetSrc<uint8_t*>(Data->SSAData, Op->RoundMode);
|
||||
#ifdef _M_ARM_64
|
||||
uint64_t HostRounding{};
|
||||
__asm volatile(R"(
|
||||
mrs %[Tmp], FPCR;
|
||||
)"
|
||||
: [Tmp] "=r" (HostRounding));
|
||||
// Mask out the rounding
|
||||
HostRounding &= ~(0b111 << 22);
|
||||
|
||||
HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0;
|
||||
|
||||
uint8_t RoundingMode = GuestRounding & 0b11;
|
||||
if (RoundingMode == IR::ROUND_MODE_NEAREST)
|
||||
HostRounding |= (0b00U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY)
|
||||
HostRounding |= (0b01U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY)
|
||||
HostRounding |= (0b10U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO)
|
||||
HostRounding |= (0b11U << 22);
|
||||
|
||||
__asm volatile(R"(
|
||||
msr FPCR, %[Tmp];
|
||||
)"
|
||||
:: [Tmp] "r" (HostRounding));
|
||||
#else
|
||||
uint32_t HostRounding = _mm_getcsr();
|
||||
|
||||
// Cut out the host rounding mode
|
||||
HostRounding &= ~(0b111 << 13);
|
||||
|
||||
// Insert our new rounding mode
|
||||
HostRounding |= GuestRounding << 13;
|
||||
_mm_setcsr(HostRounding);
|
||||
#endif
|
||||
}
|
||||
|
||||
DEF_OP(Print) {
|
||||
auto Op = IROp->C<IR::IROp_Print>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
if (OpSize <= 8) {
|
||||
const auto Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src);
|
||||
}
|
||||
else if (OpSize == 16) {
|
||||
const auto Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Value);
|
||||
const uint64_t Src0 = Src;
|
||||
const uint64_t Src1 = Src >> 64;
|
||||
LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0);
|
||||
LogMan::Msg::IFmt(" Value[1] in Arg: 0x{:x}, {}", Src1, Src1);
|
||||
}
|
||||
else
|
||||
LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(ProcessorID) {
|
||||
uint32_t CPU, CPUNode;
|
||||
FHU::Syscalls::getcpu(&CPU, &CPUNode);
|
||||
GD = (CPUNode << 12) | CPU;
|
||||
}
|
||||
|
||||
DEF_OP(RDRAND) {
|
||||
// We are ignoring Op->GetReseeded in the interpreter
|
||||
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
|
||||
ssize_t Result = ::getrandom(&DstPtr[0], 8, 0);
|
||||
|
||||
// Second result is if we managed to read a valid random number or not
|
||||
DstPtr[1] = Result == 8 ? 1 : 0;
|
||||
}
|
||||
|
||||
DEF_OP(Yield) {
|
||||
// Nop implementation
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
@ -1,36 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(ExtractElementPair) {
|
||||
auto Op = IROp->C<IR::IROp_ExtractElementPair>();
|
||||
const auto Src = GetSrc<uintptr_t>(Data->SSAData, Op->Pair);
|
||||
memcpy(GDP,
|
||||
reinterpret_cast<void*>(Src + Op->Header.Size * Op->Element), Op->Header.Size);
|
||||
}
|
||||
|
||||
DEF_OP(CreateElementPair) {
|
||||
auto Op = IROp->C<IR::IROp_CreateElementPair>();
|
||||
const void *Src_Lower = GetSrc<void*>(Data->SSAData, Op->Lower);
|
||||
const void *Src_Upper = GetSrc<void*>(Data->SSAData, Op->Upper);
|
||||
|
||||
uint8_t *Dst = GetDest<uint8_t*>(Data->SSAData, Node);
|
||||
|
||||
memcpy(Dst, Src_Lower, IROp->ElementSize);
|
||||
memcpy(Dst + IROp->ElementSize, Src_Upper, IROp->ElementSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
File diff suppressed because it is too large
Load Diff
@ -19,15 +19,13 @@
|
||||
namespace FEXCore::Config {
|
||||
namespace Handler {
|
||||
static inline std::optional<fextl::string> CoreHandler(std::string_view Value) {
|
||||
if (Value == "irint")
|
||||
if (Value == "irjit")
|
||||
return "0";
|
||||
else if (Value == "irjit")
|
||||
return "1";
|
||||
#ifdef _M_X86_64
|
||||
else if (Value == "host")
|
||||
return "2";
|
||||
return "1";
|
||||
#endif
|
||||
return "1";
|
||||
return "0";
|
||||
}
|
||||
|
||||
static inline std::optional<fextl::string> SMCCheckHandler(std::string_view Value) {
|
||||
@ -61,7 +59,6 @@ namespace Handler {
|
||||
#include <FEXCore/Config/ConfigOptions.inl>
|
||||
|
||||
enum ConfigCore {
|
||||
CONFIG_INTERPRETER,
|
||||
CONFIG_IRJIT,
|
||||
CONFIG_CUSTOM,
|
||||
};
|
||||
|
@ -246,21 +246,6 @@ namespace {
|
||||
|
||||
if (ImGui::BeginTabItem("CPU")) {
|
||||
std::optional<fextl::string*> Value{};
|
||||
#ifdef INTERPRETER_ENABLED
|
||||
ImGui::Text("Core:");
|
||||
Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CORE);
|
||||
|
||||
ImGui::SameLine();
|
||||
if (ImGui::RadioButton("Int", Value.has_value() && **Value == "0")) {
|
||||
LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "0");
|
||||
ConfigChanged = true;
|
||||
}
|
||||
ImGui::SameLine();
|
||||
if (ImGui::RadioButton("JIT", Value.has_value() && **Value == "1")) {
|
||||
LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "1");
|
||||
ConfigChanged = true;
|
||||
}
|
||||
#endif
|
||||
Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MAXINST);
|
||||
if (Value.has_value() && !(*Value)->empty()) {
|
||||
strncpy(BlockSize, &(*Value)->at(0), 32);
|
||||
|
@ -1735,26 +1735,7 @@ namespace FEX::HLE {
|
||||
return Result.first;
|
||||
};
|
||||
|
||||
const auto SigbusHandlerInterpreter = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *_info, void *ucontext) -> bool {
|
||||
const auto PC = ArchHelpers::Context::GetPc(ucontext);
|
||||
siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);
|
||||
|
||||
if (info->si_code != BUS_ADRALN) {
|
||||
// This only handles alignment problems
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, ArchHelpers::Context::GetArmGPRs(ucontext));
|
||||
ArchHelpers::Context::SetPc(ucontext, PC + Result.second);
|
||||
return Result.first;
|
||||
};
|
||||
|
||||
if (Core == FEXCore::Config::CONFIG_INTERPRETER) {
|
||||
RegisterHostSignalHandler(SIGBUS, SigbusHandlerInterpreter, true);
|
||||
}
|
||||
else {
|
||||
RegisterHostSignalHandler(SIGBUS, SigbusHandler, true);
|
||||
}
|
||||
RegisterHostSignalHandler(SIGBUS, SigbusHandler, true);
|
||||
#endif
|
||||
// Register pause signal handler.
|
||||
RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true);
|
||||
|
@ -61,14 +61,6 @@ foreach(ASM_SRC ${ASM_SOURCES})
|
||||
)
|
||||
endif()
|
||||
|
||||
if (ENABLE_INTERPRETER)
|
||||
list(APPEND TEST_ARGS
|
||||
"--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int"
|
||||
"--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int"
|
||||
"--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NOT MINGW_BUILD)
|
||||
set (LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
|
||||
else()
|
||||
|
@ -54,13 +54,6 @@ foreach(ASM_SRC ${ASM_SOURCES})
|
||||
"--no-silent -g -c irjit -n 500 --no-multiblock" "jit_500" "jit"
|
||||
"--no-silent -g -c irjit -n 500 --multiblock" "jit_500_m" "jit"
|
||||
)
|
||||
if (ENABLE_INTERPRETER)
|
||||
list(APPEND TEST_ARGS
|
||||
"--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int"
|
||||
"--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int"
|
||||
"--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (ENABLE_VIXL_SIMULATOR)
|
||||
set(CPU_CLASS Simulator)
|
||||
|
@ -25,12 +25,6 @@ foreach(IR_SRC ${IR_SOURCES})
|
||||
"--no-silent -c irjit -n 500" "ir_jit" "jit"
|
||||
)
|
||||
|
||||
if (ENABLE_INTERPRETER)
|
||||
list(APPEND TEST_ARGS
|
||||
"--no-silent -c irint -n 500" "ir_int" "int"
|
||||
)
|
||||
endif()
|
||||
|
||||
set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests")
|
||||
if (DEFINED ENV{runner_label})
|
||||
set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests_$ENV{runner_label}")
|
||||
|
@ -9,20 +9,6 @@ foreach(POSIX_TEST ${POSIX_TESTS})
|
||||
list(GET TEST_NAME_LIST 1 TEST_NAME)
|
||||
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
|
||||
|
||||
if (ENABLE_INTERPRETER)
|
||||
add_test(NAME "${TEST_NAME}.int.posix"
|
||||
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
|
||||
"${TEST_NAME}"
|
||||
"guest"
|
||||
"${CMAKE_BINARY_DIR}/Bin/FEXLoader"
|
||||
"-o" "stderr" "--no-silent" "-c" "irint" "-n" "500" "--"
|
||||
"${POSIX_TEST}")
|
||||
endif()
|
||||
|
||||
add_test(NAME "${TEST_NAME}.jit.posix"
|
||||
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
|
||||
|
@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
|
||||
list(GET TEST_NAME_LIST 1 TEST_NAME)
|
||||
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
|
||||
|
||||
# Interpreter is too slow to run these tests, only generate for jit
|
||||
|
||||
add_test(NAME "${TEST_NAME}.jit.gcc-target-32"
|
||||
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
|
||||
|
@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
|
||||
list(GET TEST_NAME_LIST 1 TEST_NAME)
|
||||
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
|
||||
|
||||
# Interpreter is too slow to run these tests, only generate for jit
|
||||
|
||||
add_test(NAME "${TEST_NAME}.jit.gcc-target-64"
|
||||
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
|
||||
|
@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
|
||||
list(GET TEST_NAME_LIST 1 TEST_NAME)
|
||||
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
|
||||
|
||||
# Interpreter is too slow to run these tests, only generate for jit
|
||||
|
||||
add_test(NAME "${TEST_NAME}.jit.gvisor"
|
||||
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
|
||||
|
Loading…
x
Reference in New Issue
Block a user