Merge pull request #3128 from alyssarosenzweig/rm/interp

FEXCore: Gut interpreter
This commit is contained in:
Ryan Houdek 2023-09-21 14:51:44 -07:00 committed by GitHub
commit 7d99eb05c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 9 additions and 7690 deletions

View File

@ -65,7 +65,7 @@ jobs:
# Note the current convention is to use the -S and -B options here to specify source
# and build directories, but this is only available with CMake 3.13 and higher.
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
- name: Build
working-directory: ${{runner.workspace}}/build

View File

@ -73,7 +73,7 @@ jobs:
# Note the current convention is to use the -S and -B options here to specify source
# and build directories, but this is only available with CMake 3.13 and higher.
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
- name: Build
working-directory: ${{runner.workspace}}/build

View File

@ -74,7 +74,7 @@ jobs:
# Note the current convention is to use the -S and -B options here to specify source
# and build directories, but this is only available with CMake 3.13 and higher.
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DENABLE_INTERPRETER=False -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain_mingw.cmake -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_TESTS=False -DENABLE_JEMALLOC=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False -DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/build/install
- name: Build
working-directory: ${{runner.workspace}}/build

View File

@ -25,7 +25,6 @@ option(ENABLE_JEMALLOC_GLIBC_ALLOC "Enables jemalloc glibc allocator" TRUE)
option(ENABLE_OFFLINE_TELEMETRY "Enables FEX offline telemetry" TRUE)
option(ENABLE_COMPILE_TIME_TRACE "Enables time trace compile option" FALSE)
option(ENABLE_LIBCXX "Enables LLVM libc++" FALSE)
option(ENABLE_INTERPRETER "Enables FEX's Interpreter" FALSE)
option(ENABLE_CCACHE "Enables ccache for compile caching" TRUE)
option(ENABLE_TERMUX_BUILD "Forces building for Termux on a non-Termux build machine" FALSE)
option(ENABLE_VIXL_SIMULATOR "Forces the FEX JIT to use the VIXL simulator" FALSE)
@ -97,11 +96,6 @@ if (ENABLE_GDB_SYMBOLS)
endif()
if (ENABLE_INTERPRETER)
message(STATUS "Interpreter enabled")
add_definitions(-DINTERPRETER_ENABLED=1)
endif()
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Bin)

View File

@ -159,23 +159,6 @@ if (ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
Utils/AllocatorOverride.cpp)
endif()
if (ENABLE_INTERPRETER)
list(APPEND SRCS
Interface/Core/Interpreter/InterpreterCore.cpp
Interface/Core/Interpreter/InterpreterOps.cpp
Interface/Core/Interpreter/ALUOps.cpp
Interface/Core/Interpreter/AtomicOps.cpp
Interface/Core/Interpreter/BranchOps.cpp
Interface/Core/Interpreter/ConversionOps.cpp
Interface/Core/Interpreter/EncryptionOps.cpp
Interface/Core/Interpreter/F80Ops.cpp
Interface/Core/Interpreter/FlagOps.cpp
Interface/Core/Interpreter/MemoryOps.cpp
Interface/Core/Interpreter/MiscOps.cpp
Interface/Core/Interpreter/MoveOps.cpp
Interface/Core/Interpreter/VectorOps.cpp)
endif()
set(DEFINES -DTHREAD_LOCAL=_Thread_local)
if (_M_X86_64)

View File

@ -339,11 +339,7 @@ namespace DefaultValues {
#else
constexpr uint32_t MaxCoreNumber = 1;
#endif
#ifdef INTERPRETER_ENABLED
constexpr uint32_t MinCoreNumber = 0;
#else
constexpr uint32_t MinCoreNumber = 1;
#endif
if (Core > MaxCoreNumber || Core < MinCoreNumber) {
// Sanitize the core option by setting the core to the JIT if invalid
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast<uint32_t>(FEXCore::Config::CONFIG_IRJIT)));
@ -353,11 +349,6 @@ namespace DefaultValues {
if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION)) {
FEX_CONFIG_OPT(CacheObjectCodeCompilation, CACHEOBJECTCODECOMPILATION);
FEX_CONFIG_OPT(Core, CORE);
if (CacheObjectCodeCompilation() && Core() == FEXCore::Config::CONFIG_INTERPRETER) {
// If running the interpreter then disable cache code compilation
FEXCore::Config::Erase(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION);
}
}
fextl::string ContainerPrefix { FindContainerPrefix() };

View File

@ -6,12 +6,12 @@
"Default": "FEXCore::Config::ConfigCore::CONFIG_IRJIT",
"TextDefault": "irjit",
"ShortArg": "c",
"Choices": [ "irint", "irjit", "host" ],
"Choices": [ "irjit", "host" ],
"ArgumentHandler": "CoreHandler",
"Desc": [
"Which CPU core to use",
"host only exists on x86_64",
"[irint, irjit, host]"
"[irjit, host]"
]
},
"Multiblock": {

View File

@ -308,11 +308,6 @@ namespace FEXCore::Context {
FEXCore::Core::InternalThreadState* ContextImpl::InitCore(uint64_t InitialRIP, uint64_t StackPointer) {
// Initialize the CPU core signal handlers & DispatcherConfig
switch (Config.Core) {
#ifdef INTERPRETER_ENABLED
case FEXCore::Config::CONFIG_INTERPRETER:
BackendFeatures = FEXCore::CPU::GetInterpreterBackendFeatures();
break;
#endif
case FEXCore::Config::CONFIG_IRJIT:
#if (_M_X86_64 && JIT_X86_64)
BackendFeatures = FEXCore::CPU::GetX86JITBackendFeatures();
@ -671,11 +666,6 @@ namespace FEXCore::Context {
// Create CPU backend
switch (Config.Core) {
#ifdef INTERPRETER_ENABLED
case FEXCore::Config::CONFIG_INTERPRETER:
Thread->CPUBackend = FEXCore::CPU::CreateInterpreterCore(this, Thread);
break;
#endif
case FEXCore::Config::CONFIG_IRJIT:
Thread->PassManager->InsertRegisterAllocationPass(DoSRA, HostFeatures.SupportsAVX);

File diff suppressed because it is too large Load Diff

View File

@ -1,792 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <FEXCore/Utils/BitUtils.h>
#include <cstdint>
namespace FEXCore::CPU {
#ifdef _M_X86_64
uint8_t AtomicFetchNeg(uint8_t *Addr) {
using Type = uint8_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
uint16_t AtomicFetchNeg(uint16_t *Addr) {
using Type = uint16_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
uint32_t AtomicFetchNeg(uint32_t *Addr) {
using Type = uint32_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
uint64_t AtomicFetchNeg(uint64_t *Addr) {
using Type = uint64_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
template<typename T>
T AtomicCompareAndSwap(T expected, T desired, T *addr)
{
std::atomic<T> *MemData = reinterpret_cast<std::atomic<T>*>(addr);
T Src1 = expected;
T Src2 = desired;
T Expected = Src1;
bool Result = MemData->compare_exchange_strong(Expected, Src2);
return Result ? Src1 : Expected;
}
template uint8_t AtomicCompareAndSwap<uint8_t>(uint8_t expected, uint8_t desired, uint8_t *addr);
template uint16_t AtomicCompareAndSwap<uint16_t>(uint16_t expected, uint16_t desired, uint16_t *addr);
template uint32_t AtomicCompareAndSwap<uint32_t>(uint32_t expected, uint32_t desired, uint32_t *addr);
template uint64_t AtomicCompareAndSwap<uint64_t>(uint64_t expected, uint64_t desired, uint64_t *addr);
#else
// Needs to match what the AArch64 JIT and unaligned signal handler expects
uint8_t AtomicFetchNeg(uint8_t *Addr) {
using Type = uint8_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxrb %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint16_t AtomicFetchNeg(uint16_t *Addr) {
using Type = uint16_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxrh %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint32_t AtomicFetchNeg(uint32_t *Addr) {
using Type = uint32_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxr %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxr %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint64_t AtomicFetchNeg(uint64_t *Addr) {
using Type = uint64_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxr %[Result], [%[Memory]];
neg %[Tmp], %[Result];
stlxr %w[TmpStatus], %[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
template<>
uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) {
using Type = uint8_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxrb %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected], uxtb;
b.ne 2f;
stlxrb %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) {
using Type = uint16_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxrh %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected], uxth;
b.ne 2f;
stlxrh %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) {
using Type = uint32_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxr %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected];
b.ne 2f;
stlxr %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) {
using Type = uint64_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxr %[Tmp], [%[Memory]];
cmp %[Tmp], %[Expected];
b.ne 2f;
stlxr %w[Tmp2], %[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %[Result], %[Expected];
b 3f;
2:
mov %[Result], %[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
#endif
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(CASPair) {
auto Op = IROp->C<IR::IROp_CASPair>();
// Size is the size of each pair element
switch (IROp->ElementSize) {
case 4: {
GD = AtomicCompareAndSwap(
*GetSrc<uint64_t*>(Data->SSAData, Op->Expected),
*GetSrc<uint64_t*>(Data->SSAData, Op->Desired),
*GetSrc<uint64_t**>(Data->SSAData, Op->Addr)
);
break;
}
case 8: {
std::atomic<__uint128_t> *MemData = *GetSrc<std::atomic<__uint128_t> **>(Data->SSAData, Op->Addr);
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Expected);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Desired);
__uint128_t Expected = Src1;
bool Result = MemData->compare_exchange_strong(Expected, Src2);
memcpy(GDP, Result ? &Src1 : &Expected, 16);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", IROp->ElementSize); break;
}
}
DEF_OP(CAS) {
auto Op = IROp->C<IR::IROp_CAS>();
uint8_t OpSize = IROp->Size;
switch (OpSize) {
case 1: {
GD = AtomicCompareAndSwap(
*GetSrc<uint8_t*>(Data->SSAData, Op->Expected),
*GetSrc<uint8_t*>(Data->SSAData, Op->Desired),
*GetSrc<uint8_t**>(Data->SSAData, Op->Addr)
);
break;
}
case 2: {
GD = AtomicCompareAndSwap(
*GetSrc<uint16_t*>(Data->SSAData, Op->Expected),
*GetSrc<uint16_t*>(Data->SSAData, Op->Desired),
*GetSrc<uint16_t**>(Data->SSAData, Op->Addr)
);
break;
}
case 4: {
GD = AtomicCompareAndSwap(
*GetSrc<uint32_t*>(Data->SSAData, Op->Expected),
*GetSrc<uint32_t*>(Data->SSAData, Op->Desired),
*GetSrc<uint32_t**>(Data->SSAData, Op->Addr)
);
break;
}
case 8: {
GD = AtomicCompareAndSwap(
*GetSrc<uint64_t*>(Data->SSAData, Op->Expected),
*GetSrc<uint64_t*>(Data->SSAData, Op->Desired),
*GetSrc<uint64_t**>(Data->SSAData, Op->Addr)
);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
}
}
DEF_OP(AtomicAdd) {
auto Op = IROp->C<IR::IROp_AtomicAdd>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
*MemData += Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
*MemData += Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
*MemData += Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
*MemData += Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicSub) {
auto Op = IROp->C<IR::IROp_AtomicSub>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
*MemData -= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
*MemData -= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
*MemData -= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
*MemData -= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicAnd) {
auto Op = IROp->C<IR::IROp_AtomicAnd>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
*MemData &= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
*MemData &= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
*MemData &= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
*MemData &= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicOr) {
auto Op = IROp->C<IR::IROp_AtomicOr>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
*MemData |= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
*MemData |= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
*MemData |= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
*MemData |= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicXor) {
auto Op = IROp->C<IR::IROp_AtomicXor>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
*MemData ^= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
*MemData ^= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
*MemData ^= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
*MemData ^= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicSwap) {
auto Op = IROp->C<IR::IROp_AtomicSwap>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchAdd) {
auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchSub) {
auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchAnd) {
auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchOr) {
auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchXor) {
auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
switch (IROp->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Addr);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint8_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Addr);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
uint16_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Addr);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uint32_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Addr);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
uint64_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(AtomicFetchNeg) {
auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
switch (IROp->Size) {
case 1: {
using Type = uint8_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
break;
}
case 2: {
using Type = uint16_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
break;
}
case 4: {
using Type = uint32_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
break;
}
case 8: {
using Type = uint64_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Addr));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(TelemetrySetValue) {
#ifndef FEX_DISABLE_TELEMETRY
auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
auto TelemetryPtr = reinterpret_cast<std::atomic<uint64_t>*>(Data->State->CurrentFrame->Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex]);
uint64_t Set{};
if (Src != 0) {
Set = 1;
}
*TelemetryPtr |= Set;
#endif
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Context/Context.h"
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include "Interface/HLE/Thunks/Thunks.h"
#include <FEXCore/Utils/BitUtils.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <cstdint>
#include <unistd.h>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(CallbackReturn) {
Data->State->CurrentFrame->Pointers.Interpreter.CallbackReturn(Data->State, Data->StackEntry);
}
DEF_OP(ExitFunction) {
auto Op = IROp->C<IR::IROp_ExitFunction>();
uint8_t OpSize = IROp->Size;
uintptr_t* ContextPtr = reinterpret_cast<uintptr_t*>(Data->State->CurrentFrame);
void *ContextData = reinterpret_cast<void*>(ContextPtr);
void *Src = GetSrc<void*>(Data->SSAData, Op->NewRIP);
memcpy(ContextData, Src, OpSize);
Data->BlockResults.Quit = true;
}
DEF_OP(Jump) {
auto Op = IROp->C<IR::IROp_Jump>();
const uintptr_t ListBegin = Data->CurrentIR->GetListData();
const uintptr_t DataBegin = Data->CurrentIR->GetData();
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TargetBlock);
Data->BlockResults.Redo = true;
}
DEF_OP(CondJump) {
auto Op = IROp->C<IR::IROp_CondJump>();
const uintptr_t ListBegin = Data->CurrentIR->GetListData();
const uintptr_t DataBegin = Data->CurrentIR->GetData();
bool CompResult;
const uint64_t Src1 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp1);
const uint64_t Src2 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp2);
if (Op->CompareSize == 4)
CompResult = IsConditionTrue<uint32_t, int32_t, float>(Op->Cond.Val, Src1, Src2);
else
CompResult = IsConditionTrue<uint64_t, int64_t, double>(Op->Cond.Val, Src1, Src2);
if (CompResult) {
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock);
}
else {
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock);
}
Data->BlockResults.Redo = true;
}
DEF_OP(Syscall) {
auto Op = IROp->C<IR::IROp_Syscall>();
FEXCore::HLE::SyscallArguments Args;
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
if (Op->Header.Args[j].IsInvalid()) break;
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
}
uint64_t Res = FEXCore::Context::HandleSyscall(static_cast<Context::ContextImpl*>(Data->State->CTX)->SyscallHandler, Data->State->CurrentFrame, &Args);
GD = Res;
}
DEF_OP(InlineSyscall) {
auto Op = IROp->C<IR::IROp_InlineSyscall>();
FEXCore::HLE::SyscallArguments Args;
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
if (Op->Header.Args[j].IsInvalid()) break;
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
}
// We don't want the errno handling but I also don't want to write inline ASM atm
uint64_t Res = syscall(
Op->HostSyscallNumber,
Args.Argument[0],
Args.Argument[1],
Args.Argument[2],
Args.Argument[3],
Args.Argument[4],
Args.Argument[5],
Args.Argument[6]
);
if (Res == -1) {
Res = -errno;
}
GD = Res;
}
DEF_OP(Thunk) {
auto Op = IROp->C<IR::IROp_Thunk>();
auto thunkFn = static_cast<Context::ContextImpl*>(Data->State->CTX)->ThunkHandler->LookupThunk(Op->ThunkNameHash);
thunkFn(*GetSrc<void**>(Data->SSAData, Op->ArgPtr));
}
DEF_OP(ValidateCode) {
auto Op = IROp->C<IR::IROp_ValidateCode>();
auto CodePtr = Data->CurrentEntry + Op->Offset;
if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) {
GD = 1;
} else {
GD = 0;
}
}
DEF_OP(ThreadRemoveCodeEntry) {
static_cast<Context::ContextImpl*>(Data->State->CTX)->ThreadRemoveCodeEntryFromJit(Data->State->CurrentFrame, Data->CurrentEntry);
}
DEF_OP(CPUID) {
auto Op = IROp->C<IR::IROp_CPUID>();
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
const uint64_t Arg = *GetSrc<uint64_t*>(Data->SSAData, Op->Function);
const uint64_t Leaf = *GetSrc<uint64_t*>(Data->SSAData, Op->Leaf);
auto Results = Data->State->CTX->RunCPUIDFunction(Arg, Leaf);
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
}
DEF_OP(XGETBV) {
auto Op = IROp->C<IR::IROp_XGetBV>();
uint32_t *DstPtr = GetDest<uint32_t*>(Data->SSAData, Node);
const uint32_t Function = *GetSrc<uint32_t*>(Data->SSAData, Op->Function);
auto Results = Data->State->CTX->RunXCRFunction(Function);
memcpy(DstPtr, &Results, sizeof(uint32_t) * 2);
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,279 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(VInsGPR) {
const auto Op = IROp->C<IR::IROp_VInsGPR>();
const auto OpSize = IROp->Size;
const auto ElementSize = Op->Header.ElementSize;
const auto ElementSizeBits = ElementSize * 8;
constexpr auto SSEBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
const uint64_t Offset = Op->DestIdx * ElementSizeBits;
const auto InUpperLane = Offset >= SSEBitSize;
__uint128_t Mask = (1ULL << ElementSizeBits) - 1;
if (ElementSize == 8) {
Mask = ~0ULL;
}
const auto Src1 = *GetSrc<InterpVector256*>(Data->SSAData, Op->DestVector);
const auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Src);
const auto Scalar = Src2 & Mask;
const auto ScaledOffset = InUpperLane ? Offset - SSEBitSize
: Offset;
// Now shift into place and set all bits but
// the ones where we're going to insert our value.
Mask <<= ScaledOffset;
Mask = ~Mask;
const auto Dst = [&] {
if (InUpperLane) {
return InterpVector256{
.Lower = Src1.Lower,
.Upper = (Src1.Upper & Mask) | (Scalar << ScaledOffset),
};
} else {
return InterpVector256{
.Lower = (Src1.Lower & Mask) | (Scalar << ScaledOffset),
.Upper = Src1.Upper,
};
}
}();
memcpy(GDP, &Dst, OpSize);
}
DEF_OP(VCastFromGPR) {
auto Op = IROp->C<IR::IROp_VCastFromGPR>();
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Src), Op->Header.ElementSize);
}
DEF_OP(VDupFromGPR) {
const auto Op = IROp->C<IR::IROp_VDupFromGPR>();
const auto OpSize = IROp->Size;
const auto ElementSize = IROp->ElementSize;
const auto NumElements = OpSize / IROp->ElementSize;
TempVectorDataArray Tmp{};
const auto *Src = GetSrc<void*>(Data->SSAData, Op->Src);
for (size_t i = 0; i < NumElements; i++) {
memcpy(&Tmp[i * ElementSize], Src, ElementSize);
}
memcpy(GDP, Tmp.data(), sizeof(Tmp));
}
DEF_OP(Float_FromGPR_S) {
auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();
const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
switch (Conv) {
case 0x0404: { // Float <- int32_t
const float Dst = (float)*GetSrc<int32_t*>(Data->SSAData, Op->Src);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0408: { // Float <- int64_t
const float Dst = (float)*GetSrc<int64_t*>(Data->SSAData, Op->Src);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0804: { // Double <- int32_t
const double Dst = (double)*GetSrc<int32_t*>(Data->SSAData, Op->Src);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0808: { // Double <- int64_t
const double Dst = (double)*GetSrc<int64_t*>(Data->SSAData, Op->Src);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
}
}
DEF_OP(Float_FToF) {
auto Op = IROp->C<IR::IROp_Float_FToF>();
const uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
switch (Conv) {
case 0x0804: { // Double <- Float
const double Dst = (double)*GetSrc<float*>(Data->SSAData, Op->Scalar);
memcpy(GDP, &Dst, 8);
break;
}
case 0x0408: { // Float <- Double
const float Dst = (float)*GetSrc<double*>(Data->SSAData, Op->Scalar);
memcpy(GDP, &Dst, 4);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
}
}
DEF_OP(Vector_SToF) {
auto Op = IROp->C<IR::IROp_Vector_SToF>();
const uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
TempVectorDataArray Tmp{};
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
const auto Func = [](auto a, auto min, auto max) { return a; };
switch (ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0)
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
DEF_OP(Vector_FToZS) {
const auto Op = IROp->C<IR::IROp_Vector_FToZS>();
const uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
TempVectorDataArray Tmp{};
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
const auto Func = [](auto a, auto min, auto max) { return std::trunc(a); };
switch (ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
DEF_OP(Vector_FToS) {
const auto Op = IROp->C<IR::IROp_Vector_FToS>();
const uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
TempVectorDataArray Tmp{};
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize == ElementSize ? 1 : OpSize / ElementSize;
const auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); };
switch (ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
DEF_OP(Vector_FToF) {
const auto Op = IROp->C<IR::IROp_Vector_FToF>();
const uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
TempVectorDataArray Tmp{};
const uint16_t ElementSize = Op->Header.ElementSize;
const uint16_t Conv = (ElementSize << 8) | Op->SrcElementSize;
const auto Func = [](auto a, auto min, auto max) { return a; };
switch (Conv) {
case 0x0804: { // Double <- float
// Only the lower elements from the source
// This uses half the source elements
uint8_t Elements = OpSize / 8;
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0)
break;
}
case 0x0408: { // Float <- Double
// Little bit tricky here
// Sometimes is used to convert from a 128bit vector register
// in to a 64bit vector register with different sized elements
// eg: %5 i32v2 = Vector_FToF %4 i128, #0x8
uint8_t Elements = OpSize == 8 ? 2 : OpSize / Op->SrcElementSize;
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0)
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv);
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
DEF_OP(Vector_FToI) {
const auto Op = IROp->C<IR::IROp_Vector_FToI>();
const uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Vector);
TempVectorDataArray Tmp{};
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;
const auto Func_Nearest = [](auto a) { return std::rint(a); };
const auto Func_Neg = [](auto a) { return std::floor(a); };
const auto Func_Pos = [](auto a) { return std::ceil(a); };
const auto Func_Trunc = [](auto a) { return std::trunc(a); };
const auto Func_Host = [](auto a) { return std::rint(a); };
switch (Op->Round) {
case FEXCore::IR::Round_Nearest.Val:
switch (ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Nearest)
DO_VECTOR_1SRC_OP(8, double, Func_Nearest)
}
break;
case FEXCore::IR::Round_Negative_Infinity.Val:
switch (ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Neg)
DO_VECTOR_1SRC_OP(8, double, Func_Neg)
}
break;
case FEXCore::IR::Round_Positive_Infinity.Val:
switch (ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Pos)
DO_VECTOR_1SRC_OP(8, double, Func_Pos)
}
break;
case FEXCore::IR::Round_Towards_Zero.Val:
switch (ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Trunc)
DO_VECTOR_1SRC_OP(8, double, Func_Trunc)
}
break;
case FEXCore::IR::Round_Host.Val:
switch (ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Host)
DO_VECTOR_1SRC_OP(8, double, Func_Host)
}
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,557 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace AES {
static __uint128_t InvShiftRows(uint8_t *State) {
uint8_t Shifted[16] = {
State[0], State[13], State[10], State[7],
State[4], State[1], State[14], State[11],
State[8], State[5], State[2], State[15],
State[12], State[9], State[6], State[3],
};
__uint128_t Res{};
memcpy(&Res, Shifted, 16);
return Res;
}
static __uint128_t InvSubBytes(uint8_t *State) {
// 16x16 matrix table
static const uint8_t InvSubstitutionTable[256] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
};
// Uses a byte substitution table with a constant set of values
// Needs to do a table look up
uint8_t Substituted[16];
for (size_t i = 0; i < 16; ++i) {
Substituted[i] = InvSubstitutionTable[State[i]];
}
__uint128_t Res{};
memcpy(&Res, Substituted, 16);
return Res;
}
static __uint128_t ShiftRows(uint8_t *State) {
uint8_t Shifted[16] = {
State[0], State[5], State[10], State[15],
State[4], State[9], State[14], State[3],
State[8], State[13], State[2], State[7],
State[12], State[1], State[6], State[11],
};
__uint128_t Res{};
memcpy(&Res, Shifted, 16);
return Res;
}
static __uint128_t SubBytes(uint8_t *State, size_t Bytes) {
// 16x16 matrix table
static const uint8_t SubstitutionTable[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
// Uses a byte substitution table with a constant set of values
// Needs to do a table look up
uint8_t Substituted[16];
Bytes = std::min(Bytes, (size_t)16);
for (size_t i = 0; i < Bytes; ++i) {
Substituted[i] = SubstitutionTable[State[i]];
}
__uint128_t Res{};
memcpy(&Res, Substituted, Bytes);
return Res;
}
static uint8_t FFMul02(uint8_t in) {
static const uint8_t FFMul02[256] = {
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
};
return FFMul02[in];
}
static uint8_t FFMul03(uint8_t in) {
static const uint8_t FFMul03[256] = {
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
};
return FFMul03[in];
}
static __uint128_t MixColumns(uint8_t *State) {
uint8_t In0[16] = {
State[0], State[4], State[8], State[12],
State[1], State[5], State[9], State[13],
State[2], State[6], State[10], State[14],
State[3], State[7], State[11], State[15],
};
uint8_t Out0[4]{};
uint8_t Out1[4]{};
uint8_t Out2[4]{};
uint8_t Out3[4]{};
for (size_t i = 0; i < 4; ++i) {
Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i];
Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i];
Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]);
Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]);
}
uint8_t OutArray[16] = {
Out0[0], Out1[0], Out2[0], Out3[0],
Out0[1], Out1[1], Out2[1], Out3[1],
Out0[2], Out1[2], Out2[2], Out3[2],
Out0[3], Out1[3], Out2[3], Out3[3],
};
__uint128_t Res{};
memcpy(&Res, OutArray, 16);
return Res;
}
static uint8_t FFMul09(uint8_t in) {
static const uint8_t FFMul09[256] = {
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
};
return FFMul09[in];
}
static uint8_t FFMul0B(uint8_t in) {
static const uint8_t FFMul0B[256] = {
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
};
return FFMul0B[in];
}
static uint8_t FFMul0D(uint8_t in) {
static const uint8_t FFMul0D[256] = {
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
};
return FFMul0D[in];
}
static uint8_t FFMul0E(uint8_t in) {
static const uint8_t FFMul0E[256] = {
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
};
return FFMul0E[in];
}
static __uint128_t InvMixColumns(uint8_t *State) {
uint8_t In0[16] = {
State[0], State[4], State[8], State[12],
State[1], State[5], State[9], State[13],
State[2], State[6], State[10], State[14],
State[3], State[7], State[11], State[15],
};
uint8_t Out0[4]{};
uint8_t Out1[4]{};
uint8_t Out2[4]{};
uint8_t Out3[4]{};
for (size_t i = 0; i < 4; ++i) {
Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]);
Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]);
Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]);
Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]);
}
uint8_t OutArray[16] = {
Out0[0], Out1[0], Out2[0], Out3[0],
Out0[1], Out1[1], Out2[1], Out3[1],
Out0[2], Out1[2], Out2[2], Out3[2],
Out0[3], Out1[3], Out2[3], Out3[3],
};
__uint128_t Res{};
memcpy(&Res, OutArray, 16);
return Res;
}
}
namespace CRC32 {
// CRC32 per byte lookup table.
constexpr std::array<uint32_t, 256> CRC32CTable = []() consteval {
std::array<uint32_t, 256> Table{};
// Clang 11.x doesn't support bitreverse as a consteval
// constexpr uint32_t Polynomial = 0x1EDC6F41;
constexpr uint32_t PolynomialRev = 0x82F63B78; //__builtin_bitreverse32(Polynomial);
for (size_t Char = 0; Char < std::size(Table); ++Char) {
uint32_t CurrentChar = Char;
for (size_t i = 0; i < 8; ++i) {
if (CurrentChar & 1) {
CurrentChar = (CurrentChar >> 1) ^ PolynomialRev;
}
else {
CurrentChar >>= 1;
}
}
Table[Char] = CurrentChar;
}
return Table;
}();
uint32_t crc32cb(uint32_t Accumulator, uint8_t data) {
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ data] ^ Accumulator >> 8;
return Accumulator;
}
uint32_t crc32ch(uint32_t Accumulator, uint16_t data) {
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
return Accumulator;
}
uint32_t crc32cw(uint32_t Accumulator, uint32_t data) {
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8;
return Accumulator;
}
uint32_t crc32cx(uint32_t Accumulator, uint64_t data) {
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 0) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 8) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 16) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 24) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 32) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 40) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 48) & 0xFF)] ^ Accumulator >> 8;
Accumulator = CRC32CTable[(uint8_t)Accumulator ^ ((data >> 56) & 0xFF)] ^ Accumulator >> 8;
return Accumulator;
}
}
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(AESImc) {
auto Op = IROp->C<IR::IROp_VAESImc>();
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Vector);
// Pseudo-code
// Dst = InvMixColumns(STATE)
__uint128_t Tmp{};
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Src1));
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESEnc) {
auto Op = IROp->C<IR::IROp_VAESEnc>();
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = ShiftRows(STATE)
// STATE = SubBytes(STATE)
// STATE = MixColumns(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
Tmp = AES::MixColumns(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESEncLast) {
auto Op = IROp->C<IR::IROp_VAESEncLast>();
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = ShiftRows(STATE)
// STATE = SubBytes(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESDec) {
auto Op = IROp->C<IR::IROp_VAESDec>();
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = InvShiftRows(STATE)
// STATE = InvSubBytes(STATE)
// STATE = InvMixColumns(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESDecLast) {
auto Op = IROp->C<IR::IROp_VAESDecLast>();
auto Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->State);
auto Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Key);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = InvShiftRows(STATE)
// STATE = InvSubBytes(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESKeyGenAssist) {
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
const uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Src);
// Pseudo-code
// X3 = Src1[127:96]
// X2 = Src1[95:64]
// X1 = Src1[63:32]
// X0 = Src1[31:30]
// RCON = (Zext)rcon
// Dest[31:0] = SubWord(X1)
// Dest[63:32] = RotWord(SubWord(X1)) XOR RCON
// Dest[95:64] = SubWord(X3)
// Dest[127:96] = RotWord(SubWord(X3)) XOR RCON
__uint128_t Tmp{};
uint32_t X1{};
uint32_t X3{};
memcpy(&X1, &Src1[4], 4);
memcpy(&X3, &Src1[12], 4);
uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X1), 4);
uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X3), 4);
auto Ror = [] (auto In, auto R) {
auto RotateMask = sizeof(In) * 8 - 1;
R &= RotateMask;
return (In >> R) | (In << (sizeof(In) * 8 - R));
};
uint32_t Rot_X1 = Ror(SubWord_X1, 8);
uint32_t Rot_X3 = Ror(SubWord_X3, 8);
Tmp = Rot_X3 ^ Op->RCON;
Tmp <<= 32;
Tmp |= SubWord_X3;
Tmp <<= 32;
Tmp |= Rot_X1 ^ Op->RCON;
Tmp <<= 32;
Tmp |= SubWord_X1;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(CRC32) {
auto Op = IROp->C<IR::IROp_CRC32>();
uint32_t Src1 = *GetSrc<uint32_t*>(Data->SSAData, Op->Src1);
uint8_t *Src2 = GetSrc<uint8_t*>(Data->SSAData, Op->Src2);
uint32_t Tmp{};
switch (Op->SrcSize) {
case 1:
Tmp = CRC32::crc32cb(Src1, *(uint8_t*)Src2);
break;
case 2:
Tmp = CRC32::crc32ch(Src1, *(uint16_t*)Src2);
break;
case 4:
Tmp = CRC32::crc32cw(Src1, *(uint32_t*)Src2);
break;
case 8:
Tmp = CRC32::crc32cx(Src1, *(uint64_t*)Src2);
break;
default:
LOGMAN_MSG_A_FMT("Unknown CRC32C size: {}", Op->SrcSize);
break;
}
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(PCLMUL) {
auto Op = IROp->C<IR::IROp_PCLMUL>();
const auto Selector = Op->Selector;
auto* Dst = GetDest<uint64_t*>(Data->SSAData, Node);
auto* Src1 = GetSrc<uint64_t*>(Data->SSAData, Op->Src1);
auto* Src2 = GetSrc<uint64_t*>(Data->SSAData, Op->Src2);
const uint64_t TMP1 = (Selector & 0x01) == 0 ? Src1[0] : Src1[1];
const uint64_t TMP2 = (Selector & 0x10) == 0 ? Src2[0] : Src2[1];
const auto make_lo = [](uint64_t lhs, uint64_t rhs) {
uint64_t result = 0;
for (size_t i = 0; i < 64; i++) {
if ((lhs & (1ULL << i)) != 0) {
result ^= rhs << i;
}
}
return result;
};
const auto make_hi = [](uint64_t lhs, uint64_t rhs) {
uint64_t result = 0;
for (size_t i = 1; i < 64; i++) {
if ((lhs & (1ULL << i)) != 0) {
result ^= rhs >> (64 - i);
}
}
return result;
};
Dst[0] = make_lo(TMP1, TMP2);
Dst[1] = make_hi(TMP1, TMP2);
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,349 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include "Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(F80ADD) {
auto Op = IROp->C<IR::IROp_F80Add>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80ADD>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SUB) {
auto Op = IROp->C<IR::IROp_F80Sub>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80SUB>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80MUL) {
auto Op = IROp->C<IR::IROp_F80Mul>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80MUL>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80DIV) {
auto Op = IROp->C<IR::IROp_F80Div>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80DIV>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FYL2X) {
auto Op = IROp->C<IR::IROp_F80FYL2X>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80FYL2X>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80ATAN) {
auto Op = IROp->C<IR::IROp_F80ATAN>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80ATAN>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FPREM1) {
auto Op = IROp->C<IR::IROp_F80FPREM1>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80FPREM1>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FPREM) {
auto Op = IROp->C<IR::IROp_F80FPREM>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80FPREM>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SCALE) {
auto Op = IROp->C<IR::IROp_F80SCALE>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto Tmp = CPU::OpHandlers<IR::OP_F80SCALE>::handle(Data->State->CurrentFrame->State.FCW, Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80CVT) {
auto Op = IROp->C<IR::IROp_F80CVT>();
const uint8_t OpSize = IROp->Size;
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
switch (OpSize) {
case 4: {
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVT>::handle4(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, OpSize);
break;
}
case 8: {
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVT>::handle8(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, OpSize);
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
}
}
DEF_OP(F80CVTINT) {
auto Op = IROp->C<IR::IROp_F80CVTInt>();
const uint8_t OpSize = IROp->Size;
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
switch (OpSize) {
case 2: {
int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
case 4: {
int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
case 8: {
int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
}
}
DEF_OP(F80CVTTO) {
auto Op = IROp->C<IR::IROp_F80CVTTo>();
switch (Op->SrcSize) {
case 4: {
float Src = *GetSrc<float *>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTO>::handle4(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
case 8: {
double Src = *GetSrc<double *>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTO>::handle8(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize);
}
}
DEF_OP(F80CVTTOINT) {
auto Op = IROp->C<IR::IROp_F80CVTToInt>();
switch (Op->SrcSize) {
case 2: {
int16_t Src = *GetSrc<int16_t*>(Data->SSAData, Op->Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle2(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
case 4: {
int32_t Src = *GetSrc<int32_t*>(Data->SSAData, Op->Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle4(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->SrcSize);
}
}
DEF_OP(F80ROUND) {
auto Op = IROp->C<IR::IROp_F80Round>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80ROUND>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80F2XM1) {
auto Op = IROp->C<IR::IROp_F80F2XM1>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80F2XM1>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80TAN) {
auto Op = IROp->C<IR::IROp_F80TAN>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80TAN>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SQRT) {
auto Op = IROp->C<IR::IROp_F80SQRT>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80SQRT>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SIN) {
auto Op = IROp->C<IR::IROp_F80SIN>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80SIN>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80COS) {
auto Op = IROp->C<IR::IROp_F80COS>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80COS>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80XTRACT_EXP) {
auto Op = IROp->C<IR::IROp_F80XTRACT_EXP>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80XTRACT_EXP>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80XTRACT_SIG) {
auto Op = IROp->C<IR::IROp_F80XTRACT_SIG>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80XTRACT_SIG>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80CMP) {
auto Op = IROp->C<IR::IROp_F80Cmp>();
const auto Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src1);
const auto Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src2);
const auto ResultFlags = CPU::OpHandlers<IR::OP_F80CMP>::handle<IR::FCMP_FLAG_LT | IR::FCMP_FLAG_UNORDERED | IR::FCMP_FLAG_EQ>(Data->State->CurrentFrame->State.FCW, Src1, Src2);
GD = ResultFlags;
}
DEF_OP(F80BCDLOAD) {
auto Op = IROp->C<IR::IROp_F80BCDLoad>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80BCDLOAD>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80BCDSTORE) {
auto Op = IROp->C<IR::IROp_F80BCDStore>();
const auto Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->X80Src);
const auto Tmp = CPU::OpHandlers<IR::OP_F80BCDSTORE>::handle(Data->State->CurrentFrame->State.FCW, Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F64SIN) {
auto Op = IROp->C<IR::IROp_F64SIN>();
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
const double Tmp = sin(Src);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64COS) {
auto Op = IROp->C<IR::IROp_F64COS>();
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
const double Tmp = cos(Src);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64TAN) {
auto Op = IROp->C<IR::IROp_F64TAN>();
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
const double Tmp = tan(Src);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64F2XM1) {
auto Op = IROp->C<IR::IROp_F64F2XM1>();
const double Src = *GetSrc<double*>(Data->SSAData, Op->Src);
const double Tmp = exp2(Src) - 1.0;
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64ATAN) {
auto Op = IROp->C<IR::IROp_F64ATAN>();
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
const double Tmp = atan2(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64FPREM) {
auto Op = IROp->C<IR::IROp_F64FPREM>();
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
const double Tmp = fmod(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64FPREM1) {
auto Op = IROp->C<IR::IROp_F64FPREM1>();
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
const double Tmp = remainder(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64FYL2X) {
auto Op = IROp->C<IR::IROp_F64FYL2X>();
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src);
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
const double Tmp = Src2 * log2(Src1);
memcpy(GDP, &Tmp, sizeof(double));
}
DEF_OP(F64SCALE) {
auto Op = IROp->C<IR::IROp_F64SCALE>();
const double Src1 = *GetSrc<double*>(Data->SSAData, Op->Src1);
const double Src2 = *GetSrc<double*>(Data->SSAData, Op->Src2);
const double trunc = (double)(int64_t)(Src2); //truncate
const double Tmp = Src1 * exp2(trunc);
memcpy(GDP, &Tmp, sizeof(double));
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,22 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(GetHostFlag) {
auto Op = IROp->C<IR::IROp_GetHostFlag>();
GD = (*GetSrc<uint64_t*>(Data->SSAData, Op->Value) >> Op->Flag) & 1;
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,424 +0,0 @@
// SPDX-License-Identifier: MIT
#include "Interface/Context/Context.h"
#include "Interface/Core/CPUID.h"
#include "InterpreterDefines.h"
#include "InterpreterOps.h"
#include <FEXCore/Core/CPUBackend.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/BitUtils.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include "Interface/HLE/Thunks/Thunks.h"
#include <alloca.h>
#include <algorithm>
#include <array>
#include <atomic>
#include <bit>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <limits>
#include <memory>
namespace FEXCore::CPU {
using OpHandler = void (*)(IR::IROp_Header *IROp, InterpreterOps::IROpData *Data, IR::NodeID Node);
using OpHandlerArray = std::array<OpHandler, IR::IROps::OP_LAST + 1>;
constexpr OpHandlerArray InterpreterOpHandlers = [] {
OpHandlerArray Handlers{};
for (auto& Entry : Handlers) {
Entry = &InterpreterOps::Op_Unhandled;
}
#define REGISTER_OP(op, x) Handlers[IR::IROps::OP_##op] = &InterpreterOps::Op_##x
// ALU ops
REGISTER_OP(TRUNCELEMENTPAIR, TruncElementPair);
REGISTER_OP(CONSTANT, Constant);
REGISTER_OP(ENTRYPOINTOFFSET, EntrypointOffset);
REGISTER_OP(INLINECONSTANT, InlineConstant);
REGISTER_OP(INLINEENTRYPOINTOFFSET, InlineEntrypointOffset);
REGISTER_OP(CYCLECOUNTER, CycleCounter);
REGISTER_OP(ADD, Add);
REGISTER_OP(ADDNZCV, AddNZCV);
REGISTER_OP(TESTNZ, TestNZ);
REGISTER_OP(SUB, Sub);
REGISTER_OP(SUBNZCV, SubNZCV);
REGISTER_OP(NEG, Neg);
REGISTER_OP(ABS, Abs);
REGISTER_OP(MUL, Mul);
REGISTER_OP(UMUL, UMul);
REGISTER_OP(DIV, Div);
REGISTER_OP(UDIV, UDiv);
REGISTER_OP(REM, Rem);
REGISTER_OP(UREM, URem);
REGISTER_OP(MULH, MulH);
REGISTER_OP(UMULH, UMulH);
REGISTER_OP(OR, Or);
REGISTER_OP(ORLSHL, Orlshl);
REGISTER_OP(ORLSHR, Orlshr);
REGISTER_OP(AND, And);
REGISTER_OP(ANDN, Andn);
REGISTER_OP(XOR, Xor);
REGISTER_OP(LSHL, Lshl);
REGISTER_OP(LSHR, Lshr);
REGISTER_OP(ASHR, Ashr);
REGISTER_OP(ROR, Ror);
REGISTER_OP(EXTR, Extr);
REGISTER_OP(PDEP, PDep);
REGISTER_OP(PEXT, PExt);
REGISTER_OP(LDIV, LDiv);
REGISTER_OP(LUDIV, LUDiv);
REGISTER_OP(LREM, LRem);
REGISTER_OP(LUREM, LURem);
REGISTER_OP(NOT, Not);
REGISTER_OP(POPCOUNT, Popcount);
REGISTER_OP(FINDLSB, FindLSB);
REGISTER_OP(FINDMSB, FindMSB);
REGISTER_OP(FINDTRAILINGZEROES, FindTrailingZeroes);
REGISTER_OP(COUNTLEADINGZEROES, CountLeadingZeroes);
REGISTER_OP(REV, Rev);
REGISTER_OP(BFI, Bfi);
REGISTER_OP(BFXIL, Bfxil);
REGISTER_OP(BFE, Bfe);
REGISTER_OP(SBFE, Sbfe);
REGISTER_OP(SELECT, Select);
REGISTER_OP(VEXTRACTTOGPR, VExtractToGPR);
REGISTER_OP(FLOAT_TOGPR_ZS, Float_ToGPR_ZS);
REGISTER_OP(FLOAT_TOGPR_S, Float_ToGPR_S);
REGISTER_OP(FCMP, FCmp);
// Atomic ops
REGISTER_OP(CASPAIR, CASPair);
REGISTER_OP(CAS, CAS);
REGISTER_OP(ATOMICADD, AtomicAdd);
REGISTER_OP(ATOMICSUB, AtomicSub);
REGISTER_OP(ATOMICAND, AtomicAnd);
REGISTER_OP(ATOMICOR, AtomicOr);
REGISTER_OP(ATOMICXOR, AtomicXor);
REGISTER_OP(ATOMICSWAP, AtomicSwap);
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue);
// Branch ops
REGISTER_OP(CALLBACKRETURN, CallbackReturn);
REGISTER_OP(EXITFUNCTION, ExitFunction);
REGISTER_OP(JUMP, Jump);
REGISTER_OP(CONDJUMP, CondJump);
REGISTER_OP(SYSCALL, Syscall);
REGISTER_OP(INLINESYSCALL, InlineSyscall);
REGISTER_OP(THUNK, Thunk);
REGISTER_OP(VALIDATECODE, ValidateCode);
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
REGISTER_OP(CPUID, CPUID);
REGISTER_OP(XGETBV, XGETBV);
// Conversion ops
REGISTER_OP(VINSGPR, VInsGPR);
REGISTER_OP(VCASTFROMGPR, VCastFromGPR);
REGISTER_OP(VDUPFROMGPR, VDupFromGPR);
REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S);
REGISTER_OP(FLOAT_FTOF, Float_FToF);
REGISTER_OP(VECTOR_STOF, Vector_SToF);
REGISTER_OP(VECTOR_FTOZS, Vector_FToZS);
REGISTER_OP(VECTOR_FTOS, Vector_FToS);
REGISTER_OP(VECTOR_FTOF, Vector_FToF);
REGISTER_OP(VECTOR_FTOI, Vector_FToI);
// Flag ops
REGISTER_OP(GETHOSTFLAG, GetHostFlag);
// Memory ops
REGISTER_OP(LOADCONTEXT, LoadContext);
REGISTER_OP(STORECONTEXT, StoreContext);
REGISTER_OP(LOADREGISTER, LoadRegister);
REGISTER_OP(STOREREGISTER, StoreRegister);
REGISTER_OP(LOADCONTEXTINDEXED, LoadContextIndexed);
REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed);
REGISTER_OP(SPILLREGISTER, SpillRegister);
REGISTER_OP(FILLREGISTER, FillRegister);
REGISTER_OP(LOADFLAG, LoadFlag);
REGISTER_OP(STOREFLAG, StoreFlag);
REGISTER_OP(LOADMEM, LoadMem);
REGISTER_OP(STOREMEM, StoreMem);
REGISTER_OP(LOADMEMTSO, LoadMem);
REGISTER_OP(STOREMEMTSO, StoreMem);
REGISTER_OP(VLOADVECTORMASKED, VLoadVectorMasked);
REGISTER_OP(VSTOREVECTORMASKED, VStoreVectorMasked);
REGISTER_OP(VLOADVECTORELEMENT, VLoadVectorElement);
REGISTER_OP(VSTOREVECTORELEMENT, VStoreVectorElement);
REGISTER_OP(VBROADCASTFROMMEM, VBroadcastFromMem);
REGISTER_OP(PUSH, Push);
REGISTER_OP(MEMSET, MemSet);
REGISTER_OP(MEMCPY, MemCpy);
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
REGISTER_OP(CACHELINEZERO, CacheLineZero);
// Misc ops
REGISTER_OP(DUMMY, NoOp);
REGISTER_OP(IRHEADER, NoOp);
REGISTER_OP(CODEBLOCK, NoOp);
REGISTER_OP(BEGINBLOCK, NoOp);
REGISTER_OP(ENDBLOCK, NoOp);
REGISTER_OP(GUESTOPCODE, NoOp);
REGISTER_OP(FENCE, Fence);
REGISTER_OP(BREAK, Break);
REGISTER_OP(PRINT, Print);
REGISTER_OP(GETROUNDINGMODE, GetRoundingMode);
REGISTER_OP(SETROUNDINGMODE, SetRoundingMode);
REGISTER_OP(INVALIDATEFLAGS, NoOp);
REGISTER_OP(PROCESSORID, ProcessorID);
REGISTER_OP(RDRAND, RDRAND);
REGISTER_OP(YIELD, Yield);
// Move ops
REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair);
REGISTER_OP(CREATEELEMENTPAIR, CreateElementPair);
// Vector ops
REGISTER_OP(VECTORZERO, VectorZero);
REGISTER_OP(VECTORIMM, VectorImm);
REGISTER_OP(LOADNAMEDVECTORCONSTANT, LoadNamedVectorConstant);
REGISTER_OP(LOADNAMEDVECTORINDEXEDCONSTANT, LoadNamedVectorIndexedConstant);
REGISTER_OP(VMOV, VMov);
REGISTER_OP(VAND, VAnd);
REGISTER_OP(VBIC, VBic);
REGISTER_OP(VOR, VOr);
REGISTER_OP(VXOR, VXor);
REGISTER_OP(VADD, VAdd);
REGISTER_OP(VSUB, VSub);
REGISTER_OP(VUQADD, VUQAdd);
REGISTER_OP(VUQSUB, VUQSub);
REGISTER_OP(VSQADD, VSQAdd);
REGISTER_OP(VSQSUB, VSQSub);
REGISTER_OP(VADDP, VAddP);
REGISTER_OP(VADDV, VAddV);
REGISTER_OP(VUMINV, VUMinV);
REGISTER_OP(VURAVG, VURAvg);
REGISTER_OP(VABS, VAbs);
REGISTER_OP(VPOPCOUNT, VPopcount);
REGISTER_OP(VFADD, VFAdd);
REGISTER_OP(VFADDP, VFAddP);
REGISTER_OP(VFSUB, VFSub);
REGISTER_OP(VFMUL, VFMul);
REGISTER_OP(VFDIV, VFDiv);
REGISTER_OP(VFMIN, VFMin);
REGISTER_OP(VFMAX, VFMax);
REGISTER_OP(VFRECP, VFRecp);
REGISTER_OP(VFSQRT, VFSqrt);
REGISTER_OP(VFRSQRT, VFRSqrt);
REGISTER_OP(VNEG, VNeg);
REGISTER_OP(VFNEG, VFNeg);
REGISTER_OP(VNOT, VNot);
REGISTER_OP(VUMIN, VUMin);
REGISTER_OP(VSMIN, VSMin);
REGISTER_OP(VUMAX, VUMax);
REGISTER_OP(VSMAX, VSMax);
REGISTER_OP(VZIP, VZip);
REGISTER_OP(VZIP2, VZip);
REGISTER_OP(VUNZIP, VUnZip);
REGISTER_OP(VUNZIP2, VUnZip);
REGISTER_OP(VTRN, VTrn);
REGISTER_OP(VTRN2, VTrn);
REGISTER_OP(VBSL, VBSL);
REGISTER_OP(VCMPEQ, VCMPEQ);
REGISTER_OP(VCMPEQZ, VCMPEQZ);
REGISTER_OP(VCMPGT, VCMPGT);
REGISTER_OP(VCMPGTZ, VCMPGTZ);
REGISTER_OP(VCMPLTZ, VCMPLTZ);
REGISTER_OP(VFCMPEQ, VFCMPEQ);
REGISTER_OP(VFCMPNEQ, VFCMPNEQ);
REGISTER_OP(VFCMPLT, VFCMPLT);
REGISTER_OP(VFCMPGT, VFCMPGT);
REGISTER_OP(VFCMPLE, VFCMPLE);
REGISTER_OP(VFCMPORD, VFCMPORD);
REGISTER_OP(VFCMPUNO, VFCMPUNO);
REGISTER_OP(VUSHL, VUShl);
REGISTER_OP(VUSHR, VUShr);
REGISTER_OP(VSSHR, VSShr);
REGISTER_OP(VUSHLS, VUShlS);
REGISTER_OP(VUSHRS, VUShrS);
REGISTER_OP(VSSHRS, VSShrS);
REGISTER_OP(VUSHLSWIDE, VUShlSWide);
REGISTER_OP(VUSHRSWIDE, VUShrSWide);
REGISTER_OP(VSSHRSWIDE, VSShrSWide);
REGISTER_OP(VINSELEMENT, VInsElement);
REGISTER_OP(VDUPELEMENT, VDupElement);
REGISTER_OP(VEXTR, VExtr);
REGISTER_OP(VUSHRI, VUShrI);
REGISTER_OP(VSSHRI, VSShrI);
REGISTER_OP(VSHLI, VShlI);
REGISTER_OP(VUSHRNI, VUShrNI);
REGISTER_OP(VUSHRNI2, VUShrNI2);
REGISTER_OP(VSXTL, VSXTL);
REGISTER_OP(VSXTL2, VSXTL2);
REGISTER_OP(VUXTL, VUXTL);
REGISTER_OP(VUXTL2, VUXTL2);
REGISTER_OP(VSQXTN, VSQXTN);
REGISTER_OP(VSQXTN2, VSQXTN2);
REGISTER_OP(VSQXTNPAIR, VSQXTNPair);
REGISTER_OP(VSQXTUN, VSQXTUN);
REGISTER_OP(VSQXTUN2, VSQXTUN2);
REGISTER_OP(VSQXTUNPAIR, VSQXTUNPair);
REGISTER_OP(VUMUL, VUMul);
REGISTER_OP(VSMUL, VSMul);
REGISTER_OP(VUMULL, VUMull);
REGISTER_OP(VSMULL, VSMull);
REGISTER_OP(VUMULL2, VUMull2);
REGISTER_OP(VSMULL2, VSMull2);
REGISTER_OP(VUMULH, VUMulH);
REGISTER_OP(VSMULH, VSMulH);
REGISTER_OP(VUABDL, VUABDL);
REGISTER_OP(VUABDL2, VUABDL2);
REGISTER_OP(VTBL1, VTBL1);
REGISTER_OP(VTBL2, VTBL2);
REGISTER_OP(VREV32, VRev32);
REGISTER_OP(VREV64, VRev64);
REGISTER_OP(VPCMPESTRX, VPCMPESTRX);
REGISTER_OP(VPCMPISTRX, VPCMPISTRX);
REGISTER_OP(VFCADD, VFCADD);
// Encryption ops
REGISTER_OP(VAESIMC, AESImc);
REGISTER_OP(VAESENC, AESEnc);
REGISTER_OP(VAESENCLAST, AESEncLast);
REGISTER_OP(VAESDEC, AESDec);
REGISTER_OP(VAESDECLAST, AESDecLast);
REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist);
REGISTER_OP(CRC32, CRC32);
REGISTER_OP(PCLMUL, PCLMUL);
// F80 ops
REGISTER_OP(F80ADD, F80ADD);
REGISTER_OP(F80SUB, F80SUB);
REGISTER_OP(F80MUL, F80MUL);
REGISTER_OP(F80DIV, F80DIV);
REGISTER_OP(F80FYL2X, F80FYL2X);
REGISTER_OP(F80ATAN, F80ATAN);
REGISTER_OP(F80FPREM1, F80FPREM1);
REGISTER_OP(F80FPREM, F80FPREM);
REGISTER_OP(F80SCALE, F80SCALE);
REGISTER_OP(F80CVT, F80CVT);
REGISTER_OP(F80CVTINT, F80CVTINT);
REGISTER_OP(F80CVTTO, F80CVTTO);
REGISTER_OP(F80CVTTOINT, F80CVTTOINT);
REGISTER_OP(F80ROUND, F80ROUND);
REGISTER_OP(F80F2XM1, F80F2XM1);
REGISTER_OP(F80TAN, F80TAN);
REGISTER_OP(F80SQRT, F80SQRT);
REGISTER_OP(F80SIN, F80SIN);
REGISTER_OP(F80COS, F80COS);
REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP);
REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG);
REGISTER_OP(F80CMP, F80CMP);
REGISTER_OP(F80BCDLOAD, F80BCDLOAD);
REGISTER_OP(F80BCDSTORE, F80BCDSTORE);
// F64 ops
REGISTER_OP(F64SIN, F64SIN);
REGISTER_OP(F64COS, F64COS);
REGISTER_OP(F64TAN, F64TAN);
REGISTER_OP(F64F2XM1, F64F2XM1);
REGISTER_OP(F64ATAN, F64ATAN);
REGISTER_OP(F64FPREM, F64FPREM);
REGISTER_OP(F64FPREM1, F64FPREM1);
REGISTER_OP(F64FYL2X, F64FYL2X);
REGISTER_OP(F64SCALE, F64SCALE);
return Handlers;
}();
void InterpreterOps::Op_Unhandled(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) {
LOGMAN_MSG_A_FMT("Unhandled IR Op: {}", FEXCore::IR::GetName(IROp->Op));
}
void InterpreterOps::Op_NoOp(FEXCore::IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node) {
}
void InterpreterOps::InterpretIR(FEXCore::Core::CpuStateFrame *Frame, FEXCore::IR::IRListView const *CurrentIR) {
volatile void *StackEntry = alloca(0);
const uintptr_t ListSize = CurrentIR->GetSSACount();
static_assert(sizeof(FEXCore::IR::OrderedNode) == 16);
auto BlockEnd = CurrentIR->GetBlocks().end();
// SSA data elements must be able to accommodate data that would
// fit inside the largest vector size (otherwise vector operations
// go kaboom, and we don't want that).
const size_t SSADataSize = ListSize * MaxInterpeterVectorSize;
InterpreterOps::IROpData OpData{
.State = Frame->Thread,
.CurrentEntry = Frame->State.rip,
.CurrentIR = CurrentIR,
.StackEntry = StackEntry,
.SSAData = alloca(SSADataSize),
.BlockResults = {},
.BlockIterator = CurrentIR->GetBlocks().begin(),
};
// Clear all SSAData entries to zero. Required for Zero-extend semantics
memset(OpData.SSAData, 0, SSADataSize);
while (1) {
using namespace FEXCore::IR;
auto [BlockNode, BlockHeader] = OpData.BlockIterator();
auto BlockIROp = BlockHeader->CW<IROp_CodeBlock>();
LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block");
// Reset the block results per block
memset(&OpData.BlockResults, 0, sizeof(OpData.BlockResults));
auto CodeBegin = CurrentIR->at(BlockIROp->Begin);
auto CodeLast = CurrentIR->at(BlockIROp->Last);
for (auto [CodeNode, IROp] : CurrentIR->GetCode(BlockNode)) {
const auto ID = CurrentIR->GetID(CodeNode);
const uint32_t Op = IROp->Op;
// Execute handler
OpHandler Handler = InterpreterOpHandlers[Op];
Handler(IROp, &OpData, ID);
if (OpData.BlockResults.Quit ||
OpData.BlockResults.Redo ||
CodeBegin == CodeLast) {
break;
}
++CodeBegin;
}
// Iterator will have been set, go again
if (OpData.BlockResults.Redo) {
continue;
}
// If we have set to early exit or at the end block then leave
if (OpData.BlockResults.Quit || ++OpData.BlockIterator == BlockEnd) {
break;
}
}
}
}

View File

@ -1,859 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/CPUID.h"
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
static inline void CacheLineFlush(char *Addr) {
#ifdef _M_X86_64
__asm volatile (
"clflush (%[Addr]);"
:: [Addr] "r" (Addr)
: "memory");
#else
__builtin___clear_cache(Addr, Addr+64);
#endif
}
static inline void CacheLineClean(char *Addr) {
#ifdef _M_X86_64
__asm volatile (
"clwb (%[Addr]);"
:: [Addr] "r" (Addr)
: "memory");
#elif _M_ARM_64
__asm volatile (
"dc cvac, %[Addr]"
:: [Addr] "r" (Addr)
: "memory");
#else
LOGMAN_THROW_A_FMT("Unsupported architecture with cacheline clean");
#endif
}
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(LoadContext) {
const auto Op = IROp->C<IR::IROp_LoadContext>();
const auto OpSize = IROp->Size;
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Src = ContextPtr + Op->Offset;
#define LOAD_CTX(x, y) \
case x: { \
y const *MemData = reinterpret_cast<y const*>(Src); \
GD = *MemData; \
break; \
}
switch (OpSize) {
LOAD_CTX(1, uint8_t)
LOAD_CTX(2, uint16_t)
LOAD_CTX(4, uint32_t)
LOAD_CTX(8, uint64_t)
case 16:
case 32: {
void const *MemData = reinterpret_cast<void const*>(Src);
memcpy(GDP, MemData, OpSize);
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
break;
}
#undef LOAD_CTX
}
DEF_OP(StoreContext) {
const auto Op = IROp->C<IR::IROp_StoreContext>();
const auto OpSize = IROp->Size;
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Dst = ContextPtr + Op->Offset;
void *MemData = reinterpret_cast<void*>(Dst);
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
memcpy(MemData, Src, OpSize);
}
DEF_OP(LoadRegister) {
const auto Op = IROp->C<IR::IROp_LoadRegister>();
const auto OpSize = IROp->Size;
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Src = ContextPtr + Op->Offset;
#define LOAD_CTX(x, y) \
case x: { \
y const *MemData = reinterpret_cast<y const*>(Src); \
GD = *MemData; \
break; \
}
switch (OpSize) {
LOAD_CTX(1, uint8_t)
LOAD_CTX(2, uint16_t)
LOAD_CTX(4, uint32_t)
LOAD_CTX(8, uint64_t)
case 16:
case 32: {
void const *MemData = reinterpret_cast<void const*>(Src);
memcpy(GDP, MemData, OpSize);
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
break;
}
#undef LOAD_CTX
}
DEF_OP(StoreRegister) {
const auto Op = IROp->C<IR::IROp_StoreRegister>();
const auto OpSize = IROp->Size;
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Dst = ContextPtr + Op->Offset;
void *MemData = reinterpret_cast<void*>(Dst);
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
memcpy(MemData, Src, OpSize);
}
DEF_OP(LoadContextIndexed) {
const auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
const auto OpSize = IROp->Size;
const auto Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Index);
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Src = ContextPtr + Op->BaseOffset + (Index * Op->Stride);
#define LOAD_CTX(x, y) \
case x: { \
y const *MemData = reinterpret_cast<y const*>(Src); \
GD = *MemData; \
break; \
}
switch (OpSize) {
LOAD_CTX(1, uint8_t)
LOAD_CTX(2, uint16_t)
LOAD_CTX(4, uint32_t)
LOAD_CTX(8, uint64_t)
case 16:
case 32: {
void const *MemData = reinterpret_cast<void const*>(Src);
memcpy(GDP, MemData, OpSize);
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize);
break;
}
#undef LOAD_CTX
}
DEF_OP(StoreContextIndexed) {
const auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
const auto OpSize = IROp->Size;
const auto Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Index);
const auto ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
const auto Dst = ContextPtr + Op->BaseOffset + (Index * Op->Stride);
void *MemData = reinterpret_cast<void*>(Dst);
void *Src = GetSrc<void*>(Data->SSAData, Op->Value);
memcpy(MemData, Src, OpSize);
}
DEF_OP(SpillRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(FillRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
if (Op->Flag == 24 /* NZCV */) {
uint32_t const *MemData = reinterpret_cast<uint32_t const*>(ContextPtr);
GD = *MemData;
} else {
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
GD = *MemData;
}
}
DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
uint32_t Arg = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
if (Op->Flag == 24 /* NZCV */) {
uint32_t *MemData = reinterpret_cast<uint32_t*>(ContextPtr);
*MemData = Arg;
} else {
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
*MemData = Arg;
}
}
DEF_OP(LoadMem) {
const auto Op = IROp->C<IR::IROp_LoadMem>();
const auto OpSize = IROp->Size;
uint8_t const *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
}
}
memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE);
switch (OpSize) {
case 1: {
auto D = reinterpret_cast<const std::atomic<uint8_t>*>(MemData);
GD = D->load();
break;
}
case 2: {
auto D = reinterpret_cast<const std::atomic<uint16_t>*>(MemData);
GD = D->load();
break;
}
case 4: {
auto D = reinterpret_cast<const std::atomic<uint32_t>*>(MemData);
GD = D->load();
break;
}
case 8: {
auto D = reinterpret_cast<const std::atomic<uint64_t>*>(MemData);
GD = D->load();
break;
}
default:
memcpy(GDP, MemData, OpSize);
break;
}
}
DEF_OP(StoreMem) {
const auto Op = IROp->C<IR::IROp_StoreMem>();
const auto OpSize = IROp->Size;
uint8_t *MemData = *GetSrc<uint8_t **>(Data->SSAData, Op->Addr);
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
}
}
switch (OpSize) {
case 1: {
reinterpret_cast<std::atomic<uint8_t>*>(MemData)->store(*GetSrc<uint8_t*>(Data->SSAData, Op->Value));
break;
}
case 2: {
reinterpret_cast<std::atomic<uint16_t>*>(MemData)->store(*GetSrc<uint16_t*>(Data->SSAData, Op->Value));
break;
}
case 4: {
reinterpret_cast<std::atomic<uint32_t>*>(MemData)->store(*GetSrc<uint32_t*>(Data->SSAData, Op->Value));
break;
}
case 8: {
reinterpret_cast<std::atomic<uint64_t>*>(MemData)->store(*GetSrc<uint64_t*>(Data->SSAData, Op->Value));
break;
}
default:
memcpy(MemData, GetSrc<void*>(Data->SSAData, Op->Value), OpSize);
break;
}
}
DEF_OP(VLoadVectorMasked) {
const auto Op = IROp->C<IR::IROp_VLoadVectorMasked>();
const auto OpSize = IROp->Size;
const auto ElementSize = IROp->ElementSize;
const auto NumElements = OpSize / ElementSize;
const auto *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
const auto *Mask = GetSrc<uint8_t const*>(Data->SSAData, Op->Mask);
const auto SetElements = [NumElements]<typename T>(void* Dst, const T* MaskValues, const T* MemoryData) {
const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1);
for (size_t i = 0; i < NumElements; i++) {
if ((MaskValues[i] & SignBit) != 0) {
std::memcpy(static_cast<uint8_t*>(Dst) + (i * sizeof(T)), MemoryData + i, sizeof(T));
}
}
};
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
}
}
memset(GDP, 0, Core::CPUState::XMM_AVX_REG_SIZE);
switch (ElementSize) {
case 1: {
SetElements(GDP, Mask, MemData);
return;
}
case 2: {
SetElements(GDP,
reinterpret_cast<const uint16_t*>(Mask),
reinterpret_cast<const uint16_t*>(MemData));
return;
}
case 4: {
SetElements(GDP,
reinterpret_cast<const uint32_t*>(Mask),
reinterpret_cast<const uint32_t*>(MemData));
return;
}
case 8: {
SetElements(GDP,
reinterpret_cast<const uint64_t*>(Mask),
reinterpret_cast<const uint64_t*>(MemData));
return;
}
default:
LOGMAN_MSG_A_FMT("Unhandled VLoadVectorMasked element size: {}", ElementSize);
return;
}
}
DEF_OP(VStoreVectorMasked) {
const auto Op = IROp->C<IR::IROp_VStoreVectorMasked>();
const auto OpSize = IROp->Size;
const auto ElementSize = IROp->ElementSize;
const auto NumElements = OpSize / ElementSize;
auto *Dst = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
const auto *RegData = GetSrc<uint8_t const*>(Data->SSAData, Op->Data);
const auto *Mask = GetSrc<uint8_t const*>(Data->SSAData, Op->Mask);
const auto SetElements = [NumElements]<typename T>(void* Dst, const T* MaskValues, const T* DataVals) {
const auto SignBit = 1ULL << ((sizeof(T) * 8) - 1);
for (size_t i = 0; i < NumElements; i++) {
if ((MaskValues[i] & SignBit) != 0) {
std::memcpy(static_cast<uint8_t*>(Dst) + (i * sizeof(T)), DataVals + i, sizeof(T));
}
}
};
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: Dst += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: Dst += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: Dst += (int32_t)Offset; break;
}
}
switch (ElementSize) {
case 1: {
SetElements(Dst, Mask, RegData);
return;
}
case 2: {
SetElements(Dst,
reinterpret_cast<const uint16_t*>(Mask),
reinterpret_cast<const uint16_t*>(RegData));
return;
}
case 4: {
SetElements(Dst,
reinterpret_cast<const uint32_t*>(Mask),
reinterpret_cast<const uint32_t*>(RegData));
return;
}
case 8: {
SetElements(Dst,
reinterpret_cast<const uint64_t*>(Mask),
reinterpret_cast<const uint64_t*>(RegData));
return;
}
default:
LOGMAN_MSG_A_FMT("Unhandled VStoreVectorMasked element size: {}", ElementSize);
return;
}
}
DEF_OP(VLoadVectorElement) {
const auto Op = IROp->C<IR::IROp_VLoadVectorElement>();
const auto OpSize = IROp->Size;
const auto ElementSize = IROp->ElementSize;
auto *Mem = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
const auto *DstSrc = GetSrc<uint8_t const*>(Data->SSAData, Op->DstSrc);
const auto SetElements = []<typename T>(void* Dst, const T* MemPtr, const auto Index) {
std::memcpy(static_cast<uint8_t*>(Dst) + (Index * sizeof(T)), MemPtr, sizeof(T));
};
// Copy the source data first.
memcpy(GDP, DstSrc, OpSize);
switch (ElementSize) {
case 1: {
SetElements(GDP,
Mem,
Op->Index);
return;
}
case 2: {
SetElements(GDP,
reinterpret_cast<const uint16_t*>(Mem),
Op->Index);
return;
}
case 4: {
SetElements(GDP,
reinterpret_cast<const uint32_t*>(Mem),
Op->Index);
return;
}
case 8: {
SetElements(GDP,
reinterpret_cast<const uint64_t*>(Mem),
Op->Index);
return;
}
default:
LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize);
return;
}
}
DEF_OP(VStoreVectorElement) {
const auto Op = IROp->C<IR::IROp_VStoreVectorElement>();
const auto ElementSize = IROp->ElementSize;
auto *Mem = *GetSrc<uint8_t**>(Data->SSAData, Op->Addr);
const auto *Value = GetSrc<uint8_t const*>(Data->SSAData, Op->Value);
const auto StoreElements = []<typename T>(void* MemPtr, const T* Src, const auto Index) {
std::memcpy(MemPtr, reinterpret_cast<const uint8_t*>(Src) + (Index * sizeof(T)), sizeof(T));
};
switch (ElementSize) {
case 1: {
StoreElements(Mem,
Value,
Op->Index);
return;
}
case 2: {
StoreElements(Mem,
reinterpret_cast<const uint16_t*>(Value),
Op->Index);
return;
}
case 4: {
StoreElements(Mem,
reinterpret_cast<const uint32_t*>(Value),
Op->Index);
return;
}
case 8: {
StoreElements(Mem,
reinterpret_cast<const uint64_t*>(Value),
Op->Index);
return;
}
default:
LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, ElementSize);
return;
}
}
DEF_OP(VBroadcastFromMem) {
const auto Op = IROp->C<IR::IROp_VBroadcastFromMem>();
const auto OpSize = IROp->Size;
const auto ElementSize = IROp->ElementSize;
const auto NumElements = OpSize / ElementSize;
const auto *MemData = *GetSrc<const uint8_t**>(Data->SSAData, Op->Address);
const auto BroadcastElement = [NumElements]<typename T>(void* Dst, const T* MemPtr) {
auto* DstU8 = static_cast<uint8_t*>(Dst);
for (size_t i = 0; i < NumElements; i++) {
std::memcpy(DstU8 + (i * sizeof(T)), MemPtr, sizeof(T));
}
};
switch (ElementSize) {
case 1:
BroadcastElement(GDP, MemData);
break;
case 2:
BroadcastElement(GDP, reinterpret_cast<const uint16_t*>(MemData));
break;
case 4:
BroadcastElement(GDP, reinterpret_cast<const uint32_t*>(MemData));
break;
case 8:
BroadcastElement(GDP, reinterpret_cast<const uint64_t*>(MemData));
break;
case 16:
BroadcastElement(GDP, reinterpret_cast<const __uint128_t*>(MemData));
break;
default:
LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem element size: {}", ElementSize);
break;
}
}
DEF_OP(Push) {
const auto Op = IROp->C<IR::IROp_Push>();
const auto ValueSize = Op->ValueSize;
uint64_t MemData = *GetSrc<uint64_t*>(Data->SSAData, Op->Addr);
switch (ValueSize) {
case 1: {
*reinterpret_cast<uint8_t*>(MemData - ValueSize) = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
break;
}
case 2: {
*reinterpret_cast<uint16_t*>(MemData - ValueSize) = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
break;
}
case 4: {
*reinterpret_cast<uint32_t*>(MemData - ValueSize) = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
break;
}
case 8: {
*reinterpret_cast<uint64_t*>(MemData - ValueSize) = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize);
break;
}
GD = MemData - ValueSize;
}
DEF_OP(MemSet) {
const auto Op = IROp->C<IR::IROp_MemSet>();
const int32_t Size = Op->Size;
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
uint64_t MemPrefix{};
if (!Op->Prefix.IsInvalid()) {
MemPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->Prefix);
}
const auto Value = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
const auto Length = *GetSrc<uint64_t*>(Data->SSAData, Op->Length);
const auto Direction = *GetSrc<uint8_t*>(Data->SSAData, Op->Direction);
auto MemSetElements = [](auto* Memory, uint64_t Value, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
Memory[i] = Value;
}
};
auto MemSetElementsInverse = [](auto* Memory, uint64_t Value, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
Memory[-i] = Value;
}
};
if (Direction == 0) { // Forward
if (Op->IsAtomic) {
switch (Size) {
case 1:
MemSetElements(reinterpret_cast<std::atomic<uint8_t>*>(MemData + MemPrefix), Value, Length);
break;
case 2:
MemSetElements(reinterpret_cast<std::atomic<uint16_t>*>(MemData + MemPrefix), Value, Length);
break;
case 4:
MemSetElements(reinterpret_cast<std::atomic<uint32_t>*>(MemData + MemPrefix), Value, Length);
break;
case 8:
MemSetElements(reinterpret_cast<std::atomic<uint64_t>*>(MemData + MemPrefix), Value, Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
else {
switch (Size) {
case 1:
MemSetElements(reinterpret_cast<uint8_t*>(MemData + MemPrefix), Value, Length);
break;
case 2:
MemSetElements(reinterpret_cast<uint16_t*>(MemData + MemPrefix), Value, Length);
break;
case 4:
MemSetElements(reinterpret_cast<uint32_t*>(MemData + MemPrefix), Value, Length);
break;
case 8:
MemSetElements(reinterpret_cast<uint64_t*>(MemData + MemPrefix), Value, Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
GD = reinterpret_cast<uint64_t>(MemData + (Length * Size));
}
else { // Backward
if (Op->IsAtomic) {
switch (Size) {
case 1:
MemSetElementsInverse(reinterpret_cast<std::atomic<uint8_t>*>(MemData + MemPrefix), Value, Length);
break;
case 2:
MemSetElementsInverse(reinterpret_cast<std::atomic<uint16_t>*>(MemData + MemPrefix), Value, Length);
break;
case 4:
MemSetElementsInverse(reinterpret_cast<std::atomic<uint32_t>*>(MemData + MemPrefix), Value, Length);
break;
case 8:
MemSetElementsInverse(reinterpret_cast<std::atomic<uint64_t>*>(MemData + MemPrefix), Value, Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
else {
switch (Size) {
case 1:
MemSetElementsInverse(reinterpret_cast<uint8_t*>(MemData + MemPrefix), Value, Length);
break;
case 2:
MemSetElementsInverse(reinterpret_cast<uint16_t*>(MemData + MemPrefix), Value, Length);
break;
case 4:
MemSetElementsInverse(reinterpret_cast<uint32_t*>(MemData + MemPrefix), Value, Length);
break;
case 8:
MemSetElementsInverse(reinterpret_cast<uint64_t*>(MemData + MemPrefix), Value, Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
GD = reinterpret_cast<uint64_t>(MemData - (Length * Size));
}
}
DEF_OP(MemCpy) {
const auto Op = IROp->C<IR::IROp_MemCpy>();
const int32_t Size = Op->Size;
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
char *MemDataDest = *GetSrc<char **>(Data->SSAData, Op->AddrDest);
char *MemDataSrc = *GetSrc<char **>(Data->SSAData, Op->AddrSrc);
uint64_t DestPrefix{};
uint64_t SrcPrefix{};
if (!Op->PrefixDest.IsInvalid()) {
DestPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->PrefixDest);
}
if (!Op->PrefixSrc.IsInvalid()) {
SrcPrefix = *GetSrc<uint64_t*>(Data->SSAData, Op->PrefixSrc);
}
const auto Length = *GetSrc<uint64_t*>(Data->SSAData, Op->Length);
const auto Direction = *GetSrc<uint8_t*>(Data->SSAData, Op->Direction);
auto MemSetElementsAtomic = [](auto* MemDst, auto* MemSrc, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
MemDst[i].store(MemSrc[i].load());
}
};
auto MemSetElementsAtomicInverse = [](auto* MemDst, auto* MemSrc, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
MemDst[-i].store(MemSrc[-i].load());
}
};
auto MemSetElements = [](auto* MemDst, auto* MemSrc, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
MemDst[i] = MemSrc[i];
}
};
auto MemSetElementsInverse = [](auto* MemDst, auto* MemSrc, size_t Length) {
for (size_t i = 0; i < Length; ++i) {
MemDst[-i] = MemSrc[-i];
}
};
if (Direction == 0) { // Forward
if (Op->IsAtomic) {
switch (Size) {
case 1:
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint8_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint8_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 2:
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint16_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint16_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 4:
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint32_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint32_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 8:
MemSetElementsAtomic(reinterpret_cast<std::atomic<uint64_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint64_t>*>(MemDataSrc + SrcPrefix), Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
else {
switch (Size) {
case 1:
MemSetElements(reinterpret_cast<uint8_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint8_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 2:
MemSetElements(reinterpret_cast<uint16_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint16_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 4:
MemSetElements(reinterpret_cast<uint32_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint32_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 8:
MemSetElements(reinterpret_cast<uint64_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint64_t*>(MemDataSrc + SrcPrefix), Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
DstPtr[0] = reinterpret_cast<uint64_t>(MemDataDest + (Length * Size));
DstPtr[1] = reinterpret_cast<uint64_t>(MemDataSrc + (Length * Size));
}
else { // Backward
if (Op->IsAtomic) {
switch (Size) {
case 1:
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint8_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint8_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 2:
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint16_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint16_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 4:
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint32_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint32_t>*>(MemDataSrc + SrcPrefix), Length);
break;
case 8:
MemSetElementsAtomicInverse(reinterpret_cast<std::atomic<uint64_t>*>(MemDataDest + DestPrefix), reinterpret_cast<std::atomic<uint64_t>*>(MemDataSrc + SrcPrefix), Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
else {
switch (Size) {
case 1:
MemSetElementsInverse(reinterpret_cast<uint8_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint8_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 2:
MemSetElementsInverse(reinterpret_cast<uint16_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint16_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 4:
MemSetElementsInverse(reinterpret_cast<uint32_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint32_t*>(MemDataSrc + SrcPrefix), Length);
break;
case 8:
MemSetElementsInverse(reinterpret_cast<uint64_t*>(MemDataDest + DestPrefix), reinterpret_cast<uint64_t*>(MemDataSrc + SrcPrefix), Length);
break;
default:
LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size);
break;
}
}
DstPtr[0] = reinterpret_cast<uint64_t>(MemDataDest - (Length * Size));
DstPtr[1] = reinterpret_cast<uint64_t>(MemDataSrc - (Length * Size));
}
}
DEF_OP(CacheLineClear) {
auto Op = IROp->C<IR::IROp_CacheLineClear>();
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
// 64-byte cache line clear
CacheLineFlush(MemData);
}
DEF_OP(CacheLineClean) {
auto Op = IROp->C<IR::IROp_CacheLineClean>();
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
// 64-byte cache line clear
CacheLineClean(MemData);
}
DEF_OP(CacheLineZero) {
auto Op = IROp->C<IR::IROp_CacheLineZero>();
uintptr_t MemData = *GetSrc<uintptr_t*>(Data->SSAData, Op->Addr);
// Force cacheline alignment
MemData = MemData & ~(CPUIDEmu::CACHELINE_SIZE - 1);
using DataType = uint64_t;
DataType *MemData64 = reinterpret_cast<DataType*>(MemData);
// 64-byte cache line zero
for (size_t i = 0; i < (CPUIDEmu::CACHELINE_SIZE / sizeof(DataType)); ++i) {
MemData64[i] = 0;
}
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,175 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Context/Context.h"
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <FEXHeaderUtils/Syscalls.h>
#include <cstdint>
#ifdef _M_X86_64
#include <xmmintrin.h>
#endif
#include <sys/random.h>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(Fence) {
auto Op = IROp->C<IR::IROp_Fence>();
switch (Op->Fence) {
case IR::Fence_Load.Val:
std::atomic_thread_fence(std::memory_order_acquire);
break;
case IR::Fence_LoadStore.Val:
std::atomic_thread_fence(std::memory_order_seq_cst);
break;
case IR::Fence_Store.Val:
std::atomic_thread_fence(std::memory_order_release);
break;
default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
}
}
DEF_OP(Break) {
auto Op = IROp->C<IR::IROp_Break>();
Data->State->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = 1;
Data->State->CurrentFrame->SynchronousFaultData.Signal = Op->Reason.Signal;
Data->State->CurrentFrame->SynchronousFaultData.TrapNo = Op->Reason.TrapNumber;
Data->State->CurrentFrame->SynchronousFaultData.err_code = Op->Reason.ErrorRegister;
Data->State->CurrentFrame->SynchronousFaultData.si_code = Op->Reason.si_code;
switch (Op->Reason.Signal) {
case SIGILL:
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGILL);
break;
case SIGTRAP:
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP);
break;
case SIGSEGV:
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGSEGV);
break;
default:
FHU::Syscalls::tgkill(Data->State->ThreadManager.PID, Data->State->ThreadManager.TID, SIGTRAP);
break;
}
}
DEF_OP(GetRoundingMode) {
uint32_t GuestRounding{};
#ifdef _M_ARM_64
uint64_t Tmp{};
__asm(R"(
mrs %[Tmp], FPCR;
)"
: [Tmp] "=r" (Tmp));
// Extract the rounding
// On ARM the ordering is different than on x86
GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0;
uint8_t RoundingMode = (Tmp >> 22) & 0b11;
if (RoundingMode == 0)
GuestRounding |= IR::ROUND_MODE_NEAREST;
else if (RoundingMode == 1)
GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY;
else if (RoundingMode == 2)
GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY;
else if (RoundingMode == 3)
GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO;
#else
GuestRounding = _mm_getcsr();
// Extract the rounding
GuestRounding = (GuestRounding >> 13) & 0b111;
#endif
memcpy(GDP, &GuestRounding, sizeof(GuestRounding));
}
DEF_OP(SetRoundingMode) {
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
const auto GuestRounding = *GetSrc<uint8_t*>(Data->SSAData, Op->RoundMode);
#ifdef _M_ARM_64
uint64_t HostRounding{};
__asm volatile(R"(
mrs %[Tmp], FPCR;
)"
: [Tmp] "=r" (HostRounding));
// Mask out the rounding
HostRounding &= ~(0b111 << 22);
HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0;
uint8_t RoundingMode = GuestRounding & 0b11;
if (RoundingMode == IR::ROUND_MODE_NEAREST)
HostRounding |= (0b00U << 22);
else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY)
HostRounding |= (0b01U << 22);
else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY)
HostRounding |= (0b10U << 22);
else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO)
HostRounding |= (0b11U << 22);
__asm volatile(R"(
msr FPCR, %[Tmp];
)"
:: [Tmp] "r" (HostRounding));
#else
uint32_t HostRounding = _mm_getcsr();
// Cut out the host rounding mode
HostRounding &= ~(0b111 << 13);
// Insert our new rounding mode
HostRounding |= GuestRounding << 13;
_mm_setcsr(HostRounding);
#endif
}
DEF_OP(Print) {
auto Op = IROp->C<IR::IROp_Print>();
const uint8_t OpSize = IROp->Size;
if (OpSize <= 8) {
const auto Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src);
}
else if (OpSize == 16) {
const auto Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Value);
const uint64_t Src0 = Src;
const uint64_t Src1 = Src >> 64;
LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0);
LogMan::Msg::IFmt(" Value[1] in Arg: 0x{:x}, {}", Src1, Src1);
}
else
LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize);
}
DEF_OP(ProcessorID) {
uint32_t CPU, CPUNode;
FHU::Syscalls::getcpu(&CPU, &CPUNode);
GD = (CPUNode << 12) | CPU;
}
DEF_OP(RDRAND) {
// We are ignoring Op->GetReseeded in the interpreter
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
ssize_t Result = ::getrandom(&DstPtr[0], 8, 0);
// Second result is if we managed to read a valid random number or not
DstPtr[1] = Result == 8 ? 1 : 0;
}
DEF_OP(Yield) {
// Nop implementation
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1,36 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(ExtractElementPair) {
auto Op = IROp->C<IR::IROp_ExtractElementPair>();
const auto Src = GetSrc<uintptr_t>(Data->SSAData, Op->Pair);
memcpy(GDP,
reinterpret_cast<void*>(Src + Op->Header.Size * Op->Element), Op->Header.Size);
}
DEF_OP(CreateElementPair) {
auto Op = IROp->C<IR::IROp_CreateElementPair>();
const void *Src_Lower = GetSrc<void*>(Data->SSAData, Op->Lower);
const void *Src_Upper = GetSrc<void*>(Data->SSAData, Op->Upper);
uint8_t *Dst = GetDest<uint8_t*>(Data->SSAData, Node);
memcpy(Dst, Src_Lower, IROp->ElementSize);
memcpy(Dst + IROp->ElementSize, Src_Upper, IROp->ElementSize);
}
#undef DEF_OP
} // namespace FEXCore::CPU

File diff suppressed because it is too large Load Diff

View File

@ -19,15 +19,13 @@
namespace FEXCore::Config {
namespace Handler {
static inline std::optional<fextl::string> CoreHandler(std::string_view Value) {
if (Value == "irint")
if (Value == "irjit")
return "0";
else if (Value == "irjit")
return "1";
#ifdef _M_X86_64
else if (Value == "host")
return "2";
return "1";
#endif
return "1";
return "0";
}
static inline std::optional<fextl::string> SMCCheckHandler(std::string_view Value) {
@ -61,7 +59,6 @@ namespace Handler {
#include <FEXCore/Config/ConfigOptions.inl>
enum ConfigCore {
CONFIG_INTERPRETER,
CONFIG_IRJIT,
CONFIG_CUSTOM,
};

View File

@ -246,21 +246,6 @@ namespace {
if (ImGui::BeginTabItem("CPU")) {
std::optional<fextl::string*> Value{};
#ifdef INTERPRETER_ENABLED
ImGui::Text("Core:");
Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CORE);
ImGui::SameLine();
if (ImGui::RadioButton("Int", Value.has_value() && **Value == "0")) {
LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "0");
ConfigChanged = true;
}
ImGui::SameLine();
if (ImGui::RadioButton("JIT", Value.has_value() && **Value == "1")) {
LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CORE, "1");
ConfigChanged = true;
}
#endif
Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MAXINST);
if (Value.has_value() && !(*Value)->empty()) {
strncpy(BlockSize, &(*Value)->at(0), 32);

View File

@ -1735,26 +1735,7 @@ namespace FEX::HLE {
return Result.first;
};
const auto SigbusHandlerInterpreter = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *_info, void *ucontext) -> bool {
const auto PC = ArchHelpers::Context::GetPc(ucontext);
siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);
if (info->si_code != BUS_ADRALN) {
// This only handles alignment problems
return false;
}
const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, ArchHelpers::Context::GetArmGPRs(ucontext));
ArchHelpers::Context::SetPc(ucontext, PC + Result.second);
return Result.first;
};
if (Core == FEXCore::Config::CONFIG_INTERPRETER) {
RegisterHostSignalHandler(SIGBUS, SigbusHandlerInterpreter, true);
}
else {
RegisterHostSignalHandler(SIGBUS, SigbusHandler, true);
}
RegisterHostSignalHandler(SIGBUS, SigbusHandler, true);
#endif
// Register pause signal handler.
RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true);

View File

@ -61,14 +61,6 @@ foreach(ASM_SRC ${ASM_SOURCES})
)
endif()
if (ENABLE_INTERPRETER)
list(APPEND TEST_ARGS
"--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int"
"--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int"
"--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int"
)
endif()
if (NOT MINGW_BUILD)
set (LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
else()

View File

@ -54,13 +54,6 @@ foreach(ASM_SRC ${ASM_SOURCES})
"--no-silent -g -c irjit -n 500 --no-multiblock" "jit_500" "jit"
"--no-silent -g -c irjit -n 500 --multiblock" "jit_500_m" "jit"
)
if (ENABLE_INTERPRETER)
list(APPEND TEST_ARGS
"--no-silent -g -c irint -n 1 --no-multiblock" "int_1" "int"
"--no-silent -g -c irint -n 500 --no-multiblock" "int_500" "int"
"--no-silent -g -c irint -n 500 --multiblock" "int_500_m" "int"
)
endif()
if (ENABLE_VIXL_SIMULATOR)
set(CPU_CLASS Simulator)

View File

@ -25,12 +25,6 @@ foreach(IR_SRC ${IR_SOURCES})
"--no-silent -c irjit -n 500" "ir_jit" "jit"
)
if (ENABLE_INTERPRETER)
list(APPEND TEST_ARGS
"--no-silent -c irint -n 500" "ir_int" "int"
)
endif()
set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests")
if (DEFINED ENV{runner_label})
set (RUNNER_DISABLED "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests_$ENV{runner_label}")

View File

@ -9,20 +9,6 @@ foreach(POSIX_TEST ${POSIX_TESTS})
list(GET TEST_NAME_LIST 1 TEST_NAME)
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
if (ENABLE_INTERPRETER)
add_test(NAME "${TEST_NAME}.int.posix"
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
"${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
"${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
"${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
"${TEST_NAME}"
"guest"
"${CMAKE_BINARY_DIR}/Bin/FEXLoader"
"-o" "stderr" "--no-silent" "-c" "irint" "-n" "500" "--"
"${POSIX_TEST}")
endif()
add_test(NAME "${TEST_NAME}.jit.posix"
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"

View File

@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
list(GET TEST_NAME_LIST 1 TEST_NAME)
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
# Interpreter is too slow to run these tests, only generate for jit
add_test(NAME "${TEST_NAME}.jit.gcc-target-32"
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"

View File

@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
list(GET TEST_NAME_LIST 1 TEST_NAME)
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
# Interpreter is too slow to run these tests, only generate for jit
add_test(NAME "${TEST_NAME}.jit.gcc-target-64"
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"

View File

@ -9,8 +9,6 @@ foreach(TEST ${TESTS})
list(GET TEST_NAME_LIST 1 TEST_NAME)
string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})
# Interpreter is too slow to run these tests, only generate for jit
add_test(NAME "${TEST_NAME}.jit.gvisor"
COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
"${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"