mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-03-01 11:18:42 +00:00
Merge pull request #1307 from Sonicadvance1/InterpreterDispatcher
Interpreter: Splits ops in to separate files
This commit is contained in:
commit
e9937d9a85
11
External/FEXCore/Source/CMakeLists.txt
vendored
11
External/FEXCore/Source/CMakeLists.txt
vendored
@ -97,6 +97,17 @@ set (SRCS
|
||||
Interface/Core/Dispatcher/Arm64Dispatcher.cpp
|
||||
Interface/Core/Interpreter/InterpreterCore.cpp
|
||||
Interface/Core/Interpreter/InterpreterOps.cpp
|
||||
Interface/Core/Interpreter/ALUOps.cpp
|
||||
Interface/Core/Interpreter/AtomicOps.cpp
|
||||
Interface/Core/Interpreter/BranchOps.cpp
|
||||
Interface/Core/Interpreter/ConversionOps.cpp
|
||||
Interface/Core/Interpreter/EncryptionOps.cpp
|
||||
Interface/Core/Interpreter/F80Ops.cpp
|
||||
Interface/Core/Interpreter/FlagOps.cpp
|
||||
Interface/Core/Interpreter/MemoryOps.cpp
|
||||
Interface/Core/Interpreter/MiscOps.cpp
|
||||
Interface/Core/Interpreter/MoveOps.cpp
|
||||
Interface/Core/Interpreter/VectorOps.cpp
|
||||
Interface/Core/X86Tables/BaseTables.cpp
|
||||
Interface/Core/X86Tables/DDDTables.cpp
|
||||
Interface/Core/X86Tables/EVEXTables.cpp
|
||||
|
@ -249,6 +249,8 @@ namespace FEXCore::Context {
|
||||
|
||||
LocalLoader = Loader;
|
||||
using namespace FEXCore::Core;
|
||||
|
||||
FEXCore::CPU::InitializeInterpreterOpHandlers();
|
||||
FEXCore::Core::CPUState NewThreadState = CreateDefaultCPUState();
|
||||
FEXCore::Core::InternalThreadState *Thread = CreateThread(&NewThreadState, 0);
|
||||
|
||||
|
1007
External/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp
vendored
Normal file
1007
External/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
793
External/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp
vendored
Normal file
793
External/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp
vendored
Normal file
@ -0,0 +1,793 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <FEXCore/Utils/BitUtils.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
|
||||
#ifdef _M_X86_64
|
||||
static uint8_t AtomicFetchNeg(uint8_t *Addr) {
|
||||
using Type = uint8_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
static uint16_t AtomicFetchNeg(uint16_t *Addr) {
|
||||
using Type = uint16_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
static uint32_t AtomicFetchNeg(uint32_t *Addr) {
|
||||
using Type = uint32_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
static uint64_t AtomicFetchNeg(uint64_t *Addr) {
|
||||
using Type = uint64_t;
|
||||
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
|
||||
Type Expected = MemData->load();
|
||||
Type Desired = -Expected;
|
||||
do {
|
||||
Desired = -Expected;
|
||||
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
|
||||
|
||||
return Expected;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T AtomicCompareAndSwap(T expected, T desired, T *addr)
|
||||
{
|
||||
std::atomic<T> *MemData = reinterpret_cast<std::atomic<T>*>(addr);
|
||||
|
||||
T Src1 = expected;
|
||||
T Src2 = desired;
|
||||
|
||||
T Expected = Src1;
|
||||
bool Result = MemData->compare_exchange_strong(Expected, Src2);
|
||||
|
||||
return Result ? Src1 : Expected;
|
||||
}
|
||||
#else
|
||||
// Needs to match what the AArch64 JIT and unaligned signal handler expects
|
||||
uint8_t AtomicFetchNeg(uint8_t *Addr) {
|
||||
using Type = uint8_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrb %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint16_t AtomicFetchNeg(uint16_t *Addr) {
|
||||
using Type = uint16_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrh %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint32_t AtomicFetchNeg(uint32_t *Addr) {
|
||||
using Type = uint32_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %w[Result], [%[Memory]];
|
||||
neg %w[Tmp], %w[Result];
|
||||
stlxr %w[TmpStatus], %w[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint64_t AtomicFetchNeg(uint64_t *Addr) {
|
||||
using Type = uint64_t;
|
||||
Type Result{};
|
||||
Type Tmp{};
|
||||
Type TmpStatus{};
|
||||
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %[Result], [%[Memory]];
|
||||
neg %[Tmp], %[Result];
|
||||
stlxr %w[TmpStatus], %[Tmp], [%[Memory]];
|
||||
cbnz %w[TmpStatus], 1b;
|
||||
)"
|
||||
: [Result] "=r" (Result)
|
||||
, [Tmp] "=r" (Tmp)
|
||||
, [TmpStatus] "=r" (TmpStatus)
|
||||
, [Memory] "+r" (Addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T AtomicCompareAndSwap(T expected, T desired, T *addr);
|
||||
|
||||
template<>
|
||||
uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) {
|
||||
using Type = uint8_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrb %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected], uxtb;
|
||||
b.ne 2f;
|
||||
stlxrb %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) {
|
||||
using Type = uint16_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxrh %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected], uxth;
|
||||
b.ne 2f;
|
||||
stlxrh %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) {
|
||||
using Type = uint32_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %w[Tmp], [%[Memory]];
|
||||
cmp %w[Tmp], %w[Expected];
|
||||
b.ne 2f;
|
||||
stlxr %w[Tmp2], %w[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %w[Result], %w[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %w[Result], %w[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) {
|
||||
using Type = uint64_t;
|
||||
//force Result to r9 (scratch register) or clang spills to stack
|
||||
register Type Result asm("r9"){};
|
||||
Type Tmp{};
|
||||
Type Tmp2{};
|
||||
__asm__ volatile(
|
||||
R"(
|
||||
1:
|
||||
ldaxr %[Tmp], [%[Memory]];
|
||||
cmp %[Tmp], %[Expected];
|
||||
b.ne 2f;
|
||||
stlxr %w[Tmp2], %[Desired], [%[Memory]];
|
||||
cbnz %w[Tmp2], 1b;
|
||||
mov %[Result], %[Expected];
|
||||
b 3f;
|
||||
2:
|
||||
mov %[Result], %[Tmp];
|
||||
clrex;
|
||||
3:
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp)
|
||||
, [Tmp2] "=r" (Tmp2)
|
||||
, [Desired] "+r" (desired)
|
||||
, [Expected] "+r" (expected)
|
||||
, [Result] "=r" (Result)
|
||||
, [Memory] "+r" (addr)
|
||||
:: "memory"
|
||||
);
|
||||
return Result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(CASPair) {
|
||||
auto Op = IROp->C<IR::IROp_CASPair>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
// Size is the size of each pair element
|
||||
switch (OpSize) {
|
||||
case 4: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
|
||||
*GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<__uint128_t> *MemData = *GetSrc<std::atomic<__uint128_t> **>(Data->SSAData, Op->Header.Args[2]);
|
||||
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
__uint128_t Expected = Src1;
|
||||
bool Result = MemData->compare_exchange_strong(Expected, Src2);
|
||||
memcpy(GDP, Result ? &Src1 : &Expected, 16);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CAS) {
|
||||
auto Op = IROp->C<IR::IROp_CAS>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]),
|
||||
*GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]),
|
||||
*GetSrc<uint8_t**>(Data->SSAData, Op->Header.Args[2])
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[0]),
|
||||
*GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]),
|
||||
*GetSrc<uint16_t**>(Data->SSAData, Op->Header.Args[2])
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[0]),
|
||||
*GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]),
|
||||
*GetSrc<uint32_t**>(Data->SSAData, Op->Header.Args[2])
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
GD = AtomicCompareAndSwap(
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
|
||||
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
|
||||
*GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
|
||||
);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicAdd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicAdd>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData += Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicSub) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicSub>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData -= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicAnd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicAnd>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData &= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicOr) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicOr>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData |= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicXor) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicXor>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
*MemData ^= Src;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicSwap) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicSwap>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->exchange(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchAdd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->fetch_add(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchSub) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->fetch_sub(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchAnd) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->fetch_and(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchOr) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->fetch_or(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchXor) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint8_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint16_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint32_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
uint64_t Previous = MemData->fetch_xor(Src);
|
||||
GD = Previous;
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(AtomicFetchNeg) {
|
||||
auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
|
||||
switch (Op->Size) {
|
||||
case 1: {
|
||||
using Type = uint8_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
using Type = uint16_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
using Type = uint32_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
using Type = uint64_t;
|
||||
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterAtomicHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(CASPAIR, CASPair);
|
||||
REGISTER_OP(CAS, CAS);
|
||||
REGISTER_OP(ATOMICADD, AtomicAdd);
|
||||
REGISTER_OP(ATOMICSUB, AtomicSub);
|
||||
REGISTER_OP(ATOMICAND, AtomicAnd);
|
||||
REGISTER_OP(ATOMICOR, AtomicOr);
|
||||
REGISTER_OP(ATOMICXOR, AtomicXor);
|
||||
REGISTER_OP(ATOMICSWAP, AtomicSwap);
|
||||
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
|
||||
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
|
||||
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
|
||||
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
|
||||
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
|
||||
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
||||
|
157
External/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp
vendored
Normal file
157
External/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp
vendored
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
#include "Interface/HLE/Thunks/Thunks.h"
|
||||
|
||||
#include <FEXCore/Utils/BitUtils.h>
|
||||
#include <FEXCore/HLE/SyscallHandler.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
[[noreturn]]
|
||||
static void SignalReturn(FEXCore::Core::InternalThreadState *Thread) {
|
||||
Thread->CTX->SignalThread(Thread, FEXCore::Core::SignalEvent::Return);
|
||||
|
||||
LOGMAN_MSG_A_FMT("unreachable");
|
||||
FEX_UNREACHABLE;
|
||||
}
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(GuestCallDirect) {
|
||||
LogMan::Msg::DFmt("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(GuestCallIndirect) {
|
||||
LogMan::Msg::DFmt("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(GuestReturn) {
|
||||
LogMan::Msg::DFmt("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(SignalReturn) {
|
||||
SignalReturn(Data->State);
|
||||
}
|
||||
|
||||
DEF_OP(CallbackReturn) {
|
||||
Data->State->CTX->InterpreterCallbackReturn(Data->State, Data->StackEntry);
|
||||
}
|
||||
|
||||
DEF_OP(ExitFunction) {
|
||||
auto Op = IROp->C<IR::IROp_ExitFunction>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uintptr_t* ContextPtr = reinterpret_cast<uintptr_t*>(Data->State->CurrentFrame);
|
||||
|
||||
void *ContextData = reinterpret_cast<void*>(ContextPtr);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
memcpy(ContextData, Src, OpSize);
|
||||
|
||||
Data->BlockResults.Quit = true;
|
||||
}
|
||||
|
||||
DEF_OP(Jump) {
|
||||
auto Op = IROp->C<IR::IROp_Jump>();
|
||||
uintptr_t ListBegin = Data->CurrentIR->GetListData();
|
||||
uintptr_t DataBegin = Data->CurrentIR->GetData();
|
||||
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->Header.Args[0]);
|
||||
Data->BlockResults.Redo = true;
|
||||
}
|
||||
|
||||
DEF_OP(CondJump) {
|
||||
auto Op = IROp->C<IR::IROp_CondJump>();
|
||||
uintptr_t ListBegin = Data->CurrentIR->GetListData();
|
||||
uintptr_t DataBegin = Data->CurrentIR->GetData();
|
||||
|
||||
bool CompResult;
|
||||
|
||||
uint64_t Src1 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp1);
|
||||
uint64_t Src2 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp2);
|
||||
|
||||
if (Op->CompareSize == 4)
|
||||
CompResult = IsConditionTrue<uint32_t, int32_t, float>(Op->Cond.Val, Src1, Src2);
|
||||
else
|
||||
CompResult = IsConditionTrue<uint64_t, int64_t, double>(Op->Cond.Val, Src1, Src2);
|
||||
|
||||
if (CompResult) {
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock);
|
||||
}
|
||||
else {
|
||||
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock);
|
||||
}
|
||||
Data->BlockResults.Redo = true;
|
||||
}
|
||||
|
||||
DEF_OP(Syscall) {
|
||||
auto Op = IROp->C<IR::IROp_Syscall>();
|
||||
|
||||
FEXCore::HLE::SyscallArguments Args;
|
||||
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
|
||||
if (Op->Header.Args[j].IsInvalid()) break;
|
||||
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
|
||||
}
|
||||
|
||||
uint64_t Res = FEXCore::Context::HandleSyscall(Data->State->CTX->SyscallHandler, Data->State->CurrentFrame, &Args);
|
||||
GD = Res;
|
||||
}
|
||||
|
||||
DEF_OP(Thunk) {
|
||||
auto Op = IROp->C<IR::IROp_Thunk>();
|
||||
|
||||
auto thunkFn = Data->State->CTX->ThunkHandler->LookupThunk(Op->ThunkNameHash);
|
||||
thunkFn(*GetSrc<void**>(Data->SSAData, Op->Header.Args[0]));
|
||||
}
|
||||
|
||||
DEF_OP(ValidateCode) {
|
||||
auto Op = IROp->C<IR::IROp_ValidateCode>();
|
||||
|
||||
auto CodePtr = Data->CurrentEntry + Op->Offset;
|
||||
if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) {
|
||||
GD = 1;
|
||||
} else {
|
||||
GD = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(RemoveCodeEntry) {
|
||||
Data->State->CTX->RemoveCodeEntry(Data->State, Data->CurrentEntry);
|
||||
}
|
||||
|
||||
DEF_OP(CPUID) {
|
||||
auto Op = IROp->C<IR::IROp_CPUID>();
|
||||
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
|
||||
uint64_t Arg = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Leaf = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
auto Results = Data->State->CTX->CPUID.RunFunction(Arg, Leaf);
|
||||
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterBranchHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(GUESTCALLDIRECT, GuestCallDirect);
|
||||
REGISTER_OP(GUESTCALLINDIRECT, GuestCallIndirect);
|
||||
REGISTER_OP(GUESTRETURN, GuestReturn);
|
||||
REGISTER_OP(SIGNALRETURN, SignalReturn);
|
||||
REGISTER_OP(CALLBACKRETURN, CallbackReturn);
|
||||
REGISTER_OP(EXITFUNCTION, ExitFunction);
|
||||
REGISTER_OP(JUMP, Jump);
|
||||
REGISTER_OP(CONDJUMP, CondJump);
|
||||
REGISTER_OP(SYSCALL, Syscall);
|
||||
REGISTER_OP(THUNK, Thunk);
|
||||
REGISTER_OP(VALIDATECODE, ValidateCode);
|
||||
REGISTER_OP(REMOVECODEENTRY, RemoveCodeEntry);
|
||||
REGISTER_OP(CPUID, CPUID);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
237
External/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp
vendored
Normal file
237
External/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp
vendored
Normal file
@ -0,0 +1,237 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(VInsGPR) {
|
||||
auto Op = IROp->C<IR::IROp_VInsGPR>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
uint64_t Offset = Op->Index * Op->Header.ElementSize * 8;
|
||||
__uint128_t Mask = (1ULL << (Op->Header.ElementSize * 8)) - 1;
|
||||
if (Op->Header.ElementSize == 8) {
|
||||
Mask = ~0ULL;
|
||||
}
|
||||
Src2 = Src2 & Mask;
|
||||
Mask <<= Offset;
|
||||
Mask = ~Mask;
|
||||
__uint128_t Dst = Src1 & Mask;
|
||||
Dst |= Src2 << Offset;
|
||||
|
||||
memcpy(GDP, &Dst, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(VCastFromGPR) {
|
||||
auto Op = IROp->C<IR::IROp_VCastFromGPR>();
|
||||
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), Op->Header.ElementSize);
|
||||
}
|
||||
|
||||
DEF_OP(Float_FromGPR_S) {
|
||||
auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();
|
||||
|
||||
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
|
||||
switch (Conv) {
|
||||
case 0x0404: { // Float <- int32_t
|
||||
float Dst = (float)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- int64_t
|
||||
float Dst = (float)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0804: { // Double <- int32_t
|
||||
double Dst = (double)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
case 0x0808: { // Double <- int64_t
|
||||
double Dst = (double)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, Op->Header.ElementSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Float_FToF) {
|
||||
auto Op = IROp->C<IR::IROp_Float_FToF>();
|
||||
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
|
||||
switch (Conv) {
|
||||
case 0x0804: { // Double <- Float
|
||||
double Dst = (double)*GetSrc<float*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, 8);
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- Double
|
||||
float Dst = (float)*GetSrc<double*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP, &Dst, 4);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Vector_SToF) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_SToF>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Tmp[16]{};
|
||||
|
||||
uint8_t Elements = OpSize / Op->Header.ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto min, auto max) { return a; };
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0)
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToZS) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_FToZS>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Tmp[16]{};
|
||||
|
||||
uint8_t Elements = OpSize / Op->Header.ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto min, auto max) { return std::trunc(a); };
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToS) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_FToS>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Tmp[16]{};
|
||||
|
||||
uint8_t Elements = OpSize / Op->Header.ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); };
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToF) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_FToF>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Tmp[16]{};
|
||||
|
||||
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
|
||||
|
||||
auto Func = [](auto a, auto min, auto max) { return a; };
|
||||
switch (Conv) {
|
||||
case 0x0804: { // Double <- float
|
||||
// Only the lower elements from the source
|
||||
// This uses half the source elements
|
||||
uint8_t Elements = OpSize / 8;
|
||||
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0)
|
||||
break;
|
||||
}
|
||||
case 0x0408: { // Float <- Double
|
||||
// Little bit tricky here
|
||||
// Sometimes is used to convert from a 128bit vector register
|
||||
// in to a 64bit vector register with different sized elements
|
||||
// eg: %ssa5 i32v2 = Vector_FToF %ssa4 i128, #0x8
|
||||
uint8_t Elements = (OpSize << 1) / Op->SrcElementSize;
|
||||
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0)
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(Vector_FToI) {
|
||||
auto Op = IROp->C<IR::IROp_Vector_FToI>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint8_t Tmp[16]{};
|
||||
|
||||
uint8_t Elements = OpSize / Op->Header.ElementSize;
|
||||
auto Func_Nearest = [](auto a) { return std::rint(a); };
|
||||
auto Func_Neg = [](auto a) { return std::floor(a); };
|
||||
auto Func_Pos = [](auto a) { return std::ceil(a); };
|
||||
auto Func_Trunc = [](auto a) { return std::trunc(a); };
|
||||
auto Func_Host = [](auto a) { return std::rint(a); };
|
||||
|
||||
switch (Op->Round) {
|
||||
case FEXCore::IR::Round_Nearest.Val:
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Nearest)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Nearest)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Negative_Infinity.Val:
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Neg)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Neg)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Positive_Infinity.Val:
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Pos)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Pos)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Towards_Zero.Val:
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Trunc)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Trunc)
|
||||
}
|
||||
break;
|
||||
case FEXCore::IR::Round_Host.Val:
|
||||
switch (Op->Header.ElementSize) {
|
||||
DO_VECTOR_1SRC_OP(4, float, Func_Host)
|
||||
DO_VECTOR_1SRC_OP(8, double, Func_Host)
|
||||
}
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp, OpSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterConversionHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(VINSGPR, VInsGPR);
|
||||
REGISTER_OP(VCASTFROMGPR, VCastFromGPR);
|
||||
REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S);
|
||||
REGISTER_OP(FLOAT_FTOF, Float_FToF);
|
||||
REGISTER_OP(VECTOR_STOF, Vector_SToF);
|
||||
REGISTER_OP(VECTOR_FTOZS, Vector_FToZS);
|
||||
REGISTER_OP(VECTOR_FTOS, Vector_FToS);
|
||||
REGISTER_OP(VECTOR_FTOF, Vector_FToF);
|
||||
REGISTER_OP(VECTOR_FTOI, Vector_FToI);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
|
||||
}
|
443
External/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp
vendored
Normal file
443
External/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp
vendored
Normal file
@ -0,0 +1,443 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace AES {
|
||||
static __uint128_t InvShiftRows(uint8_t *State) {
|
||||
uint8_t Shifted[16] = {
|
||||
State[0], State[13], State[10], State[7],
|
||||
State[4], State[1], State[14], State[11],
|
||||
State[8], State[5], State[2], State[15],
|
||||
State[12], State[9], State[6], State[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Shifted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t InvSubBytes(uint8_t *State) {
|
||||
// 16x16 matrix table
|
||||
static const uint8_t InvSubstitutionTable[256] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
// Uses a byte substitution table with a constant set of values
|
||||
// Needs to do a table look up
|
||||
uint8_t Substituted[16];
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
Substituted[i] = InvSubstitutionTable[State[i]];
|
||||
}
|
||||
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Substituted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t ShiftRows(uint8_t *State) {
|
||||
uint8_t Shifted[16] = {
|
||||
State[0], State[5], State[10], State[15],
|
||||
State[4], State[9], State[14], State[3],
|
||||
State[8], State[13], State[2], State[7],
|
||||
State[12], State[1], State[6], State[11],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Shifted, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static __uint128_t SubBytes(uint8_t *State, size_t Bytes) {
|
||||
// 16x16 matrix table
|
||||
static const uint8_t SubstitutionTable[256] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
// Uses a byte substitution table with a constant set of values
|
||||
// Needs to do a table look up
|
||||
uint8_t Substituted[16];
|
||||
Bytes = std::min(Bytes, (size_t)16);
|
||||
for (size_t i = 0; i < Bytes; ++i) {
|
||||
Substituted[i] = SubstitutionTable[State[i]];
|
||||
}
|
||||
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, Substituted, Bytes);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static uint8_t FFMul02(uint8_t in) {
|
||||
static const uint8_t FFMul02[256] = {
|
||||
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
|
||||
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
|
||||
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
|
||||
0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
|
||||
0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
|
||||
0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
|
||||
0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
|
||||
0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
|
||||
0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
|
||||
0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
|
||||
0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
|
||||
0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
|
||||
0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
|
||||
0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
|
||||
0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
|
||||
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
|
||||
};
|
||||
return FFMul02[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul03(uint8_t in) {
|
||||
static const uint8_t FFMul03[256] = {
|
||||
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
|
||||
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
|
||||
0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
|
||||
0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
|
||||
0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
|
||||
0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
|
||||
0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
|
||||
0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
|
||||
0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
|
||||
0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
|
||||
0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
|
||||
0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
|
||||
0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
|
||||
0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
|
||||
0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
|
||||
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
|
||||
};
|
||||
return FFMul03[in];
|
||||
}
|
||||
|
||||
static __uint128_t MixColumns(uint8_t *State) {
|
||||
uint8_t In0[16] = {
|
||||
State[0], State[4], State[8], State[12],
|
||||
State[1], State[5], State[9], State[13],
|
||||
State[2], State[6], State[10], State[14],
|
||||
State[3], State[7], State[11], State[15],
|
||||
};
|
||||
|
||||
uint8_t Out0[4]{};
|
||||
uint8_t Out1[4]{};
|
||||
uint8_t Out2[4]{};
|
||||
uint8_t Out3[4]{};
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i];
|
||||
Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i];
|
||||
Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]);
|
||||
Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]);
|
||||
}
|
||||
|
||||
uint8_t OutArray[16] = {
|
||||
Out0[0], Out1[0], Out2[0], Out3[0],
|
||||
Out0[1], Out1[1], Out2[1], Out3[1],
|
||||
Out0[2], Out1[2], Out2[2], Out3[2],
|
||||
Out0[3], Out1[3], Out2[3], Out3[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, OutArray, 16);
|
||||
return Res;
|
||||
}
|
||||
|
||||
static uint8_t FFMul09(uint8_t in) {
|
||||
static const uint8_t FFMul09[256] = {
|
||||
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
|
||||
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
|
||||
0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
|
||||
0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
|
||||
0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
|
||||
0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
|
||||
0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
|
||||
0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
|
||||
0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
|
||||
0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
|
||||
0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
|
||||
0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
|
||||
0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
|
||||
0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
|
||||
0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
|
||||
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
|
||||
};
|
||||
return FFMul09[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0B(uint8_t in) {
|
||||
static const uint8_t FFMul0B[256] = {
|
||||
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
|
||||
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
|
||||
0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
|
||||
0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
|
||||
0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
|
||||
0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
|
||||
0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
|
||||
0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
|
||||
0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
|
||||
0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
|
||||
0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
|
||||
0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
|
||||
0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
|
||||
0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
|
||||
0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
|
||||
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
|
||||
};
|
||||
return FFMul0B[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0D(uint8_t in) {
|
||||
static const uint8_t FFMul0D[256] = {
|
||||
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
|
||||
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
|
||||
0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
|
||||
0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
|
||||
0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
|
||||
0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
|
||||
0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
|
||||
0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
|
||||
0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
|
||||
0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
|
||||
0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
|
||||
0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
|
||||
0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
|
||||
0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
|
||||
0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
|
||||
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
|
||||
};
|
||||
|
||||
return FFMul0D[in];
|
||||
}
|
||||
|
||||
static uint8_t FFMul0E(uint8_t in) {
|
||||
static const uint8_t FFMul0E[256] = {
|
||||
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
|
||||
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
|
||||
0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
|
||||
0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
|
||||
0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
|
||||
0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
|
||||
0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
|
||||
0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
|
||||
0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
|
||||
0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
|
||||
0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
|
||||
0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
|
||||
0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
|
||||
0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
|
||||
0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
|
||||
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
|
||||
};
|
||||
|
||||
return FFMul0E[in];
|
||||
}
|
||||
|
||||
static __uint128_t InvMixColumns(uint8_t *State) {
|
||||
uint8_t In0[16] = {
|
||||
State[0], State[4], State[8], State[12],
|
||||
State[1], State[5], State[9], State[13],
|
||||
State[2], State[6], State[10], State[14],
|
||||
State[3], State[7], State[11], State[15],
|
||||
};
|
||||
|
||||
uint8_t Out0[4]{};
|
||||
uint8_t Out1[4]{};
|
||||
uint8_t Out2[4]{};
|
||||
uint8_t Out3[4]{};
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]);
|
||||
Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]);
|
||||
Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]);
|
||||
Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]);
|
||||
}
|
||||
|
||||
uint8_t OutArray[16] = {
|
||||
Out0[0], Out1[0], Out2[0], Out3[0],
|
||||
Out0[1], Out1[1], Out2[1], Out3[1],
|
||||
Out0[2], Out1[2], Out2[2], Out3[2],
|
||||
Out0[3], Out1[3], Out2[3], Out3[3],
|
||||
};
|
||||
__uint128_t Res{};
|
||||
memcpy(&Res, OutArray, 16);
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
|
||||
DEF_OP(AESImc) {
|
||||
auto Op = IROp->C<IR::IROp_VAESImc>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
// Pseudo-code
|
||||
// Dst = InvMixColumns(STATE)
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Src1));
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESEnc) {
|
||||
auto Op = IROp->C<IR::IROp_VAESEnc>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = ShiftRows(STATE)
|
||||
// STATE = SubBytes(STATE)
|
||||
// STATE = MixColumns(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
|
||||
Tmp = AES::MixColumns(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESEncLast) {
|
||||
auto Op = IROp->C<IR::IROp_VAESEncLast>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = ShiftRows(STATE)
|
||||
// STATE = SubBytes(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESDec) {
|
||||
auto Op = IROp->C<IR::IROp_VAESDec>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = InvShiftRows(STATE)
|
||||
// STATE = InvSubBytes(STATE)
|
||||
// STATE = InvMixColumns(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESDecLast) {
|
||||
auto Op = IROp->C<IR::IROp_VAESDecLast>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
// Pseudo-code
|
||||
// STATE = Src1
|
||||
// RoundKey = Src2
|
||||
// STATE = InvShiftRows(STATE)
|
||||
// STATE = InvSubBytes(STATE)
|
||||
// Dst = STATE XOR RoundKey
|
||||
__uint128_t Tmp{};
|
||||
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
|
||||
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
|
||||
Tmp = Tmp ^ Src2;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
DEF_OP(AESKeyGenAssist) {
|
||||
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
|
||||
uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
// Pseudo-code
|
||||
// X3 = Src1[127:96]
|
||||
// X2 = Src1[95:64]
|
||||
// X1 = Src1[63:32]
|
||||
// X0 = Src1[31:30]
|
||||
// RCON = (Zext)rcon
|
||||
// Dest[31:0] = SubWord(X1)
|
||||
// Dest[63:32] = RotWord(SubWord(X1)) XOR RCON
|
||||
// Dest[95:64] = SubWord(X3)
|
||||
// Dest[127:96] = RotWord(SubWord(X3)) XOR RCON
|
||||
__uint128_t Tmp{};
|
||||
uint32_t X1{};
|
||||
uint32_t X3{};
|
||||
memcpy(&X1, &Src1[4], 4);
|
||||
memcpy(&X3, &Src1[12], 4);
|
||||
uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X1), 4);
|
||||
uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X3), 4);
|
||||
|
||||
auto Ror = [] (auto In, auto R) {
|
||||
auto RotateMask = sizeof(In) * 8 - 1;
|
||||
R &= RotateMask;
|
||||
return (In >> R) | (In << (sizeof(In) * 8 - R));
|
||||
};
|
||||
|
||||
uint32_t Rot_X1 = Ror(SubWord_X1, 8);
|
||||
uint32_t Rot_X3 = Ror(SubWord_X3, 8);
|
||||
|
||||
Tmp = Rot_X3 ^ Op->RCON;
|
||||
Tmp <<= 32;
|
||||
Tmp |= SubWord_X3;
|
||||
Tmp <<= 32;
|
||||
Tmp |= Rot_X1 ^ Op->RCON;
|
||||
Tmp <<= 32;
|
||||
Tmp |= SubWord_X1;
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterEncryptionHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(VAESIMC, AESImc);
|
||||
REGISTER_OP(VAESENC, AESEnc);
|
||||
REGISTER_OP(VAESENCLAST, AESEncLast);
|
||||
REGISTER_OP(VAESDEC, AESDec);
|
||||
REGISTER_OP(VAESDECLAST, AESDecLast);
|
||||
REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
389
External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp
vendored
Normal file
389
External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp
vendored
Normal file
@ -0,0 +1,389 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include "F80Ops.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(F80LOADFCW) {
|
||||
FEXCore::CPU::OpHandlers<IR::OP_F80LOADFCW>::handle(*GetSrc<uint16_t*>(Data->SSAData, IROp->Args[0]));
|
||||
}
|
||||
|
||||
DEF_OP(F80ADD) {
|
||||
auto Op = IROp->C<IR::IROp_F80Add>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FADD(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SUB) {
|
||||
auto Op = IROp->C<IR::IROp_F80Sub>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FSUB(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80MUL) {
|
||||
auto Op = IROp->C<IR::IROp_F80Mul>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FMUL(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80DIV) {
|
||||
auto Op = IROp->C<IR::IROp_F80Div>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FDIV(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FYL2X) {
|
||||
auto Op = IROp->C<IR::IROp_F80FYL2X>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FYL2X(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80ATAN) {
|
||||
auto Op = IROp->C<IR::IROp_F80ATAN>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FATAN(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FPREM1) {
|
||||
auto Op = IROp->C<IR::IROp_F80FPREM1>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FREM1(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80FPREM) {
|
||||
auto Op = IROp->C<IR::IROp_F80FPREM>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FREM(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SCALE) {
|
||||
auto Op = IROp->C<IR::IROp_F80SCALE>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FSCALE(Src1, Src2);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80CVT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVT>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
switch (OpSize) {
|
||||
case 4: {
|
||||
float Tmp = Src;
|
||||
memcpy(GDP, &Tmp, OpSize);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
double Tmp = Src;
|
||||
memcpy(GDP, &Tmp, OpSize);
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTINT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTInt>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
switch (OpSize) {
|
||||
case 2: {
|
||||
int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(Tmp));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTTO) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTTo>();
|
||||
|
||||
switch (Op->Size) {
|
||||
case 4: {
|
||||
float Src = *GetSrc<float *>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp = Src;
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
double Src = *GetSrc<double *>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp = Src;
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80CVTTOINT) {
|
||||
auto Op = IROp->C<IR::IROp_F80CVTToInt>();
|
||||
|
||||
switch (Op->Size) {
|
||||
case 2: {
|
||||
int16_t Src = *GetSrc<int16_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp = Src;
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
int32_t Src = *GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp = Src;
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(F80ROUND) {
|
||||
auto Op = IROp->C<IR::IROp_F80Round>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FRNDINT(Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80F2XM1) {
|
||||
auto Op = IROp->C<IR::IROp_F80F2XM1>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::F2XM1(Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80TAN) {
|
||||
auto Op = IROp->C<IR::IROp_F80TAN>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FTAN(Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SQRT) {
|
||||
auto Op = IROp->C<IR::IROp_F80SQRT>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FSQRT(Src);
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80SIN) {
|
||||
auto Op = IROp->C<IR::IROp_F80SIN>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FSIN(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80COS) {
|
||||
auto Op = IROp->C<IR::IROp_F80COS>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FCOS(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80XTRACT_EXP) {
|
||||
auto Op = IROp->C<IR::IROp_F80XTRACT_EXP>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FXTRACT_EXP(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80XTRACT_SIG) {
|
||||
auto Op = IROp->C<IR::IROp_F80XTRACT_SIG>();
|
||||
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Tmp;
|
||||
Tmp = X80SoftFloat::FXTRACT_SIG(Src);
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80CMP) {
|
||||
auto Op = IROp->C<IR::IROp_F80Cmp>();
|
||||
uint32_t ResultFlags{};
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
|
||||
bool eq, lt, nan;
|
||||
X80SoftFloat::FCMP(Src1, Src2, &eq, <, &nan);
|
||||
if (Op->Flags & (1 << IR::FCMP_FLAG_LT) &&
|
||||
lt) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_LT);
|
||||
}
|
||||
if (Op->Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
|
||||
nan) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
|
||||
}
|
||||
if (Op->Flags & (1 << IR::FCMP_FLAG_EQ) &&
|
||||
eq) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
|
||||
}
|
||||
|
||||
GD = ResultFlags;
|
||||
}
|
||||
|
||||
DEF_OP(F80BCDLOAD) {
|
||||
auto Op = IROp->C<IR::IROp_F80BCDLoad>();
|
||||
uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t BCD{};
|
||||
// We walk through each uint8_t and pull out the BCD encoding
|
||||
// Each 4bit split is a digit
|
||||
// Only 0-9 is supported, A-F results in undefined data
|
||||
// | 4 bit | 4 bit |
|
||||
// | 10s place | 1s place |
|
||||
// EG 0x48 = 48
|
||||
// EG 0x4847 = 4847
|
||||
// This gives us an 18digit value encoded in BCD
|
||||
// The last byte lets us know if it negative or not
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
uint8_t Digit = Src1[8 - i];
|
||||
// First shift our last value over
|
||||
BCD *= 100;
|
||||
|
||||
// Add the tens place digit
|
||||
BCD += (Digit >> 4) * 10;
|
||||
|
||||
// Add the ones place digit
|
||||
BCD += Digit & 0xF;
|
||||
}
|
||||
|
||||
// Set negative flag once converted to x87
|
||||
bool Negative = Src1[9] & 0x80;
|
||||
X80SoftFloat Tmp;
|
||||
|
||||
Tmp = BCD;
|
||||
Tmp.Sign = Negative;
|
||||
|
||||
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
|
||||
}
|
||||
|
||||
DEF_OP(F80BCDSTORE) {
|
||||
auto Op = IROp->C<IR::IROp_F80BCDStore>();
|
||||
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
|
||||
bool Negative = Src1.Sign;
|
||||
|
||||
// Clear the Sign bit
|
||||
Src1.Sign = 0;
|
||||
|
||||
uint64_t Tmp = Src1;
|
||||
uint8_t BCD[10]{};
|
||||
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
if (Tmp == 0) {
|
||||
// Nothing left? Just leave
|
||||
break;
|
||||
}
|
||||
// Extract the lower 100 values
|
||||
uint8_t Digit = Tmp % 100;
|
||||
|
||||
// Now divide it for the next iteration
|
||||
Tmp /= 100;
|
||||
|
||||
uint8_t UpperNibble = Digit / 10;
|
||||
uint8_t LowerNibble = Digit % 10;
|
||||
|
||||
// Now store the BCD
|
||||
BCD[i] = (UpperNibble << 4) | LowerNibble;
|
||||
}
|
||||
|
||||
// Set negative flag once converted to x87
|
||||
BCD[9] = Negative ? 0x80 : 0;
|
||||
|
||||
memcpy(GDP, BCD, 10);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterF80Handlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(F80LOADFCW, F80LOADFCW);
|
||||
REGISTER_OP(F80ADD, F80ADD);
|
||||
REGISTER_OP(F80SUB, F80SUB);
|
||||
REGISTER_OP(F80MUL, F80MUL);
|
||||
REGISTER_OP(F80DIV, F80DIV);
|
||||
REGISTER_OP(F80FYL2X, F80FYL2X);
|
||||
REGISTER_OP(F80ATAN, F80ATAN);
|
||||
REGISTER_OP(F80FPREM1, F80FPREM1);
|
||||
REGISTER_OP(F80FPREM, F80FPREM);
|
||||
REGISTER_OP(F80SCALE, F80SCALE);
|
||||
REGISTER_OP(F80CVT, F80CVT);
|
||||
REGISTER_OP(F80CVTINT, F80CVTINT);
|
||||
REGISTER_OP(F80CVTTO, F80CVTTO);
|
||||
REGISTER_OP(F80CVTTOINT, F80CVTTOINT);
|
||||
REGISTER_OP(F80ROUND, F80ROUND);
|
||||
REGISTER_OP(F80F2XM1, F80F2XM1);
|
||||
REGISTER_OP(F80TAN, F80TAN);
|
||||
REGISTER_OP(F80SQRT, F80SQRT);
|
||||
REGISTER_OP(F80SIN, F80SIN);
|
||||
REGISTER_OP(F80COS, F80COS);
|
||||
REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP);
|
||||
REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG);
|
||||
REGISTER_OP(F80CMP, F80CMP);
|
||||
REGISTER_OP(F80BCDLOAD, F80BCDLOAD);
|
||||
REGISTER_OP(F80BCDSTORE, F80BCDSTORE);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
330
External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.h
vendored
Normal file
330
External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.h
vendored
Normal file
@ -0,0 +1,330 @@
|
||||
#pragma once
|
||||
#include "Common/SoftFloat.h"
|
||||
#include "Common/SoftFloat-3e/softfloat.h"
|
||||
|
||||
#include <FEXCore/IR/IR.h>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
template<IR::IROps Op>
|
||||
struct OpHandlers {
|
||||
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80CVTTO> {
|
||||
static X80SoftFloat handle4(float src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static X80SoftFloat handle8(double src) {
|
||||
return src;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80CMP> {
|
||||
template<uint32_t Flags>
|
||||
static uint64_t handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
bool eq, lt, nan;
|
||||
uint64_t ResultFlags = 0;
|
||||
|
||||
X80SoftFloat::FCMP(Src1, Src2, &eq, <, &nan);
|
||||
if (Flags & (1 << IR::FCMP_FLAG_LT) &&
|
||||
lt) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_LT);
|
||||
}
|
||||
if (Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
|
||||
nan) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
|
||||
}
|
||||
if (Flags & (1 << IR::FCMP_FLAG_EQ) &&
|
||||
eq) {
|
||||
ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
|
||||
}
|
||||
return ResultFlags;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80CVT> {
|
||||
static float handle4(X80SoftFloat src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static double handle8(X80SoftFloat src) {
|
||||
return src;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80CVTINT> {
|
||||
static int16_t handle2(X80SoftFloat src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static int32_t handle4(X80SoftFloat src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static int64_t handle8(X80SoftFloat src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static int16_t handle2t(X80SoftFloat src) {
|
||||
auto rv = extF80_to_i32(src, softfloat_round_minMag, false);
|
||||
|
||||
if (rv > INT16_MAX) {
|
||||
return INT16_MAX;
|
||||
} else if (rv < INT16_MIN) {
|
||||
return INT16_MIN;
|
||||
} else {
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t handle4t(X80SoftFloat src) {
|
||||
return extF80_to_i32(src, softfloat_round_minMag, false);
|
||||
}
|
||||
|
||||
static int64_t handle8t(X80SoftFloat src) {
|
||||
return extF80_to_i64(src, softfloat_round_minMag, false);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80CVTTOINT> {
|
||||
static X80SoftFloat handle2(int16_t src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
static X80SoftFloat handle4(int32_t src) {
|
||||
return src;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80ROUND> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FRNDINT(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80F2XM1> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::F2XM1(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80TAN> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FTAN(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80SQRT> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FSQRT(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80SIN> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FSIN(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80COS> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FCOS(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80XTRACT_EXP> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FXTRACT_EXP(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80XTRACT_SIG> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
return X80SoftFloat::FXTRACT_SIG(Src1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80ADD> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FADD(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80SUB> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FSUB(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80MUL> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FMUL(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80DIV> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FDIV(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80FYL2X> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FYL2X(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80ATAN> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FATAN(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80FPREM1> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FREM1(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80FPREM> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FREM(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80SCALE> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
|
||||
return X80SoftFloat::FSCALE(Src1, Src2);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80BCDSTORE> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src1) {
|
||||
bool Negative = Src1.Sign;
|
||||
|
||||
// Clear the Sign bit
|
||||
Src1.Sign = 0;
|
||||
|
||||
uint64_t Tmp = Src1;
|
||||
X80SoftFloat Rv;
|
||||
uint8_t *BCD = reinterpret_cast<uint8_t*>(&Rv);
|
||||
memset(BCD, 0, 10);
|
||||
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
if (Tmp == 0) {
|
||||
// Nothing left? Just leave
|
||||
break;
|
||||
}
|
||||
// Extract the lower 100 values
|
||||
uint8_t Digit = Tmp % 100;
|
||||
|
||||
// Now divide it for the next iteration
|
||||
Tmp /= 100;
|
||||
|
||||
uint8_t UpperNibble = Digit / 10;
|
||||
uint8_t LowerNibble = Digit % 10;
|
||||
|
||||
// Now store the BCD
|
||||
BCD[i] = (UpperNibble << 4) | LowerNibble;
|
||||
}
|
||||
|
||||
// Set negative flag once converted to x87
|
||||
BCD[9] = Negative ? 0x80 : 0;
|
||||
|
||||
return Rv;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80BCDLOAD> {
|
||||
static X80SoftFloat handle(X80SoftFloat Src) {
|
||||
uint8_t *Src1 = reinterpret_cast<uint8_t *>(&Src);
|
||||
uint64_t BCD{};
|
||||
// We walk through each uint8_t and pull out the BCD encoding
|
||||
// Each 4bit split is a digit
|
||||
// Only 0-9 is supported, A-F results in undefined data
|
||||
// | 4 bit | 4 bit |
|
||||
// | 10s place | 1s place |
|
||||
// EG 0x48 = 48
|
||||
// EG 0x4847 = 4847
|
||||
// This gives us an 18digit value encoded in BCD
|
||||
// The last byte lets us know if it negative or not
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
uint8_t Digit = Src1[8 - i];
|
||||
// First shift our last value over
|
||||
BCD *= 100;
|
||||
|
||||
// Add the tens place digit
|
||||
BCD += (Digit >> 4) * 10;
|
||||
|
||||
// Add the ones place digit
|
||||
BCD += Digit & 0xF;
|
||||
}
|
||||
|
||||
// Set negative flag once converted to x87
|
||||
bool Negative = Src1[9] & 0x80;
|
||||
X80SoftFloat Tmp;
|
||||
|
||||
Tmp = BCD;
|
||||
Tmp.Sign = Negative;
|
||||
return Tmp;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OpHandlers<IR::OP_F80LOADFCW> {
|
||||
static void handle(uint16_t NewFCW) {
|
||||
|
||||
auto PC = (NewFCW >> 8) & 3;
|
||||
switch(PC) {
|
||||
case 0: extF80_roundingPrecision = 32; break;
|
||||
case 2: extF80_roundingPrecision = 64; break;
|
||||
case 3: extF80_roundingPrecision = 80; break;
|
||||
case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
|
||||
}
|
||||
|
||||
auto RC = (NewFCW >> 10) & 3;
|
||||
switch(RC) {
|
||||
case 0:
|
||||
softfloat_roundingMode = softfloat_round_near_even;
|
||||
break;
|
||||
case 1:
|
||||
softfloat_roundingMode = softfloat_round_min;
|
||||
break;
|
||||
case 2:
|
||||
softfloat_roundingMode = softfloat_round_max;
|
||||
break;
|
||||
case 3:
|
||||
softfloat_roundingMode = softfloat_round_minMag;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
27
External/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp
vendored
Normal file
27
External/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(GetHostFlag) {
|
||||
auto Op = IROp->C<IR::IROp_GetHostFlag>();
|
||||
GD = (*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]) >> Op->Flag) & 1;
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterFlagHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(GETHOSTFLAG, GetHostFlag);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
||||
|
@ -32,19 +32,16 @@ public:
|
||||
|
||||
bool HandleSIGBUS(int Signal, void *info, void *ucontext);
|
||||
|
||||
static void InitializeInterpreterOpHandlers();
|
||||
|
||||
private:
|
||||
FEXCore::Context::Context *CTX;
|
||||
FEXCore::Core::InternalThreadState *State;
|
||||
|
||||
uint32_t AllocateTmpSpace(size_t Size);
|
||||
|
||||
template<typename Res>
|
||||
Res GetDest(void* SSAData, IR::OrderedNodeWrapper Op);
|
||||
|
||||
template<typename Res>
|
||||
Res GetSrc(void* SSAData, IR::OrderedNodeWrapper Src);
|
||||
|
||||
std::unique_ptr<Dispatcher> Dispatcher{};
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -95,10 +95,27 @@ bool InterpreterCore::HandleSIGBUS(int Signal, void *info, void *ucontext) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void InitializeInterpreterOpHandlers() {
|
||||
for (uint32_t i = 0; i <= FEXCore::IR::IROps::OP_LAST; ++i) {
|
||||
InterpreterOps::OpHandlers[i] = &InterpreterOps::Op_Unhandled;
|
||||
}
|
||||
|
||||
InterpreterOps::RegisterALUHandlers();
|
||||
InterpreterOps::RegisterAtomicHandlers();
|
||||
InterpreterOps::RegisterBranchHandlers();
|
||||
InterpreterOps::RegisterConversionHandlers();
|
||||
InterpreterOps::RegisterFlagHandlers();
|
||||
InterpreterOps::RegisterMemoryHandlers();
|
||||
InterpreterOps::RegisterMiscHandlers();
|
||||
InterpreterOps::RegisterMoveHandlers();
|
||||
InterpreterOps::RegisterVectorHandlers();
|
||||
InterpreterOps::RegisterEncryptionHandlers();
|
||||
InterpreterOps::RegisterF80Handlers();
|
||||
}
|
||||
|
||||
InterpreterCore::InterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread)
|
||||
: CTX {ctx}
|
||||
, State {Thread} {
|
||||
// Grab our space for temporary data
|
||||
|
||||
if (!CompileThread &&
|
||||
CTX->Config.Core == FEXCore::Config::CONFIG_INTERPRETER) {
|
||||
|
@ -13,6 +13,7 @@ namespace FEXCore::Core {
|
||||
namespace FEXCore::CPU {
|
||||
class CPUBackend;
|
||||
|
||||
void InitializeInterpreterOpHandlers();
|
||||
std::unique_ptr<CPUBackend> CreateInterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread);
|
||||
|
||||
}
|
||||
|
179
External/FEXCore/Source/Interface/Core/Interpreter/InterpreterDefines.h
vendored
Normal file
179
External/FEXCore/Source/Interface/Core/Interpreter/InterpreterDefines.h
vendored
Normal file
@ -0,0 +1,179 @@
|
||||
#pragma once
|
||||
|
||||
#include <FEXCore/IR/IR.h>
|
||||
|
||||
#define GD *GetDest<uint64_t*>(Data->SSAData, Node)
|
||||
#define GDP GetDest<void*>(Data->SSAData, Node)
|
||||
|
||||
#define DO_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(GDP); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
*Dst_d = func(*Src1_d, *Src2_d); \
|
||||
break; \
|
||||
}
|
||||
#define DO_SCALAR_COMPARE_OP(size, type, type2, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type2*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
Dst_d[0] = func(Src1_d[0], Src2_d[0]); \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define DO_VECTOR_COMPARE_OP(size, type, type2, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type2*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i], Src2_d[i]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i], Src2_d[i]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_PAIR_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i*2], Src1_d[i*2 + 1]); \
|
||||
Dst_d[i+Elements] = func(Src2_d[i*2], Src2_d[i*2 + 1]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_SCALAR_OP(size, type, func)\
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i], *Src2_d); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_0SRC_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_1SRC_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src_d[i]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_REDUCE_1SRC_OP(size, type, func, start_val) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type*>(Src); \
|
||||
type begin = start_val; \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
begin = func(begin, Src_d[i]); \
|
||||
} \
|
||||
Dst_d[0] = begin; \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_SAT_OP(size, type, func, min, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i], Src2_d[i], min, max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP(size, type, type2, func, min, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func(Src_d[i], min, max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(type, type2, func, min, max) \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func(Src_d[i], min, max); \
|
||||
}
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP_TOP(size, type, type2, func, min, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src2); \
|
||||
memcpy(Dst_d, Src1, Elements * sizeof(type2));\
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i+Elements] = (type)func(Src_d[i], min, max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(size, type, type2, func, min, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func(Src_d[i+Elements], min, max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_2SRC_2TYPE_OP(size, type, type2, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type2*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type2*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func((type)Src1_d[i], (type)Src2_d[i]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(size, type, type2, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type2*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type2*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func((type)Src1_d[i+Elements], (type)Src2_d[i+Elements]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
template<typename Res>
|
||||
Res GetDest(void* SSAData, FEXCore::IR::OrderedNodeWrapper Op) {
|
||||
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op.ID()];
|
||||
return reinterpret_cast<Res>(DstPtr);
|
||||
}
|
||||
|
||||
template<typename Res>
|
||||
Res GetDest(void* SSAData, uint32_t Op) {
|
||||
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op];
|
||||
return reinterpret_cast<Res>(DstPtr);
|
||||
}
|
||||
|
||||
|
||||
template<typename Res>
|
||||
Res GetSrc(void* SSAData, FEXCore::IR::OrderedNodeWrapper Src) {
|
||||
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Src.ID()];
|
||||
return reinterpret_cast<Res>(DstPtr);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/IR/IntrusiveIRList.h>
|
||||
|
||||
namespace FEXCore::Core {
|
||||
struct InternalThreadState;
|
||||
}
|
||||
@ -42,5 +45,365 @@ namespace FEXCore::CPU {
|
||||
public:
|
||||
static void InterpretIR(FEXCore::Core::InternalThreadState *Thread, uint64_t Entry, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData);
|
||||
static bool GetFallbackHandler(IR::IROp_Header *IROp, FallbackInfo *Info);
|
||||
|
||||
static void RegisterALUHandlers();
|
||||
static void RegisterAtomicHandlers();
|
||||
static void RegisterBranchHandlers();
|
||||
static void RegisterConversionHandlers();
|
||||
static void RegisterFlagHandlers();
|
||||
static void RegisterMemoryHandlers();
|
||||
static void RegisterMiscHandlers();
|
||||
static void RegisterMoveHandlers();
|
||||
static void RegisterVectorHandlers();
|
||||
static void RegisterEncryptionHandlers();
|
||||
static void RegisterF80Handlers();
|
||||
|
||||
struct IROpData {
|
||||
FEXCore::Core::InternalThreadState *State{};
|
||||
uint64_t CurrentEntry{};
|
||||
FEXCore::IR::IRListView *CurrentIR{};
|
||||
volatile void *StackEntry{};
|
||||
void *SSAData{};
|
||||
struct {
|
||||
bool Quit;
|
||||
bool Redo;
|
||||
} BlockResults{};
|
||||
|
||||
IR::NodeIterator BlockIterator{0, 0};
|
||||
};
|
||||
|
||||
using OpHandler = std::function<void(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)>;
|
||||
static std::array<OpHandler, FEXCore::IR::IROps::OP_LAST + 1> OpHandlers;
|
||||
|
||||
#define DEF_OP(x) static void Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
|
||||
///< Unhandled handler
|
||||
DEF_OP(Unhandled);
|
||||
|
||||
///< No-op Handler
|
||||
DEF_OP(NoOp);
|
||||
|
||||
///< ALU Ops
|
||||
DEF_OP(TruncElementPair);
|
||||
DEF_OP(Constant);
|
||||
DEF_OP(EntrypointOffset);
|
||||
DEF_OP(InlineConstant);
|
||||
DEF_OP(InlineEntrypointOffset);
|
||||
DEF_OP(CycleCounter);
|
||||
DEF_OP(Add);
|
||||
DEF_OP(Sub);
|
||||
DEF_OP(Neg);
|
||||
DEF_OP(Mul);
|
||||
DEF_OP(UMul);
|
||||
DEF_OP(Div);
|
||||
DEF_OP(UDiv);
|
||||
DEF_OP(Rem);
|
||||
DEF_OP(URem);
|
||||
DEF_OP(MulH);
|
||||
DEF_OP(UMulH);
|
||||
DEF_OP(Or);
|
||||
DEF_OP(And);
|
||||
DEF_OP(Xor);
|
||||
DEF_OP(Lshl);
|
||||
DEF_OP(Lshr);
|
||||
DEF_OP(Ashr);
|
||||
DEF_OP(Rol);
|
||||
DEF_OP(Ror);
|
||||
DEF_OP(Extr);
|
||||
DEF_OP(LDiv);
|
||||
DEF_OP(LUDiv);
|
||||
DEF_OP(LRem);
|
||||
DEF_OP(LURem);
|
||||
DEF_OP(Zext);
|
||||
DEF_OP(Not);
|
||||
DEF_OP(Popcount);
|
||||
DEF_OP(FindLSB);
|
||||
DEF_OP(FindMSB);
|
||||
DEF_OP(FindTrailingZeros);
|
||||
DEF_OP(CountLeadingZeroes);
|
||||
DEF_OP(Rev);
|
||||
DEF_OP(Bfi);
|
||||
DEF_OP(Bfe);
|
||||
DEF_OP(Sbfe);
|
||||
DEF_OP(Select);
|
||||
DEF_OP(VExtractToGPR);
|
||||
DEF_OP(Float_ToGPR_ZU);
|
||||
DEF_OP(Float_ToGPR_ZS);
|
||||
DEF_OP(Float_ToGPR_S);
|
||||
DEF_OP(FCmp);
|
||||
|
||||
///< Atomic ops
|
||||
DEF_OP(CASPair);
|
||||
DEF_OP(CAS);
|
||||
DEF_OP(AtomicAdd);
|
||||
DEF_OP(AtomicSub);
|
||||
DEF_OP(AtomicAnd);
|
||||
DEF_OP(AtomicOr);
|
||||
DEF_OP(AtomicXor);
|
||||
DEF_OP(AtomicSwap);
|
||||
DEF_OP(AtomicFetchAdd);
|
||||
DEF_OP(AtomicFetchSub);
|
||||
DEF_OP(AtomicFetchAnd);
|
||||
DEF_OP(AtomicFetchOr);
|
||||
DEF_OP(AtomicFetchXor);
|
||||
DEF_OP(AtomicFetchNeg);
|
||||
|
||||
///< Branch ops
|
||||
DEF_OP(GuestCallDirect);
|
||||
DEF_OP(GuestCallIndirect);
|
||||
DEF_OP(GuestReturn);
|
||||
DEF_OP(SignalReturn);
|
||||
DEF_OP(CallbackReturn);
|
||||
DEF_OP(ExitFunction);
|
||||
DEF_OP(Jump);
|
||||
DEF_OP(CondJump);
|
||||
DEF_OP(Syscall);
|
||||
DEF_OP(Thunk);
|
||||
DEF_OP(ValidateCode);
|
||||
DEF_OP(RemoveCodeEntry);
|
||||
DEF_OP(CPUID);
|
||||
|
||||
///< Conversion ops
|
||||
DEF_OP(VInsGPR);
|
||||
DEF_OP(VCastFromGPR);
|
||||
DEF_OP(Float_FromGPR_S);
|
||||
DEF_OP(Float_FToF);
|
||||
DEF_OP(Vector_SToF);
|
||||
DEF_OP(Vector_FToZS);
|
||||
DEF_OP(Vector_FToS);
|
||||
DEF_OP(Vector_FToF);
|
||||
DEF_OP(Vector_FToI);
|
||||
|
||||
///< Flag ops
|
||||
DEF_OP(GetHostFlag);
|
||||
|
||||
///< Memory ops
|
||||
DEF_OP(LoadContext);
|
||||
DEF_OP(StoreContext);
|
||||
DEF_OP(LoadRegister);
|
||||
DEF_OP(StoreRegister);
|
||||
DEF_OP(LoadContextIndexed);
|
||||
DEF_OP(StoreContextIndexed);
|
||||
DEF_OP(SpillRegister);
|
||||
DEF_OP(FillRegister);
|
||||
DEF_OP(LoadFlag);
|
||||
DEF_OP(StoreFlag);
|
||||
DEF_OP(LoadMem);
|
||||
DEF_OP(StoreMem);
|
||||
DEF_OP(VLoadMemElement);
|
||||
DEF_OP(VStoreMemElement);
|
||||
DEF_OP(CacheLineClear);
|
||||
|
||||
///< Misc ops
|
||||
DEF_OP(EndBlock);
|
||||
DEF_OP(Fence);
|
||||
DEF_OP(Break);
|
||||
DEF_OP(Phi);
|
||||
DEF_OP(PhiValue);
|
||||
DEF_OP(Print);
|
||||
DEF_OP(GetRoundingMode);
|
||||
DEF_OP(SetRoundingMode);
|
||||
|
||||
///< Move ops
|
||||
DEF_OP(ExtractElementPair);
|
||||
DEF_OP(CreateElementPair);
|
||||
DEF_OP(Mov);
|
||||
|
||||
///< Vector ops
|
||||
DEF_OP(VectorZero);
|
||||
DEF_OP(VectorImm);
|
||||
DEF_OP(CreateVector2);
|
||||
DEF_OP(CreateVector4);
|
||||
DEF_OP(SplatVector);
|
||||
DEF_OP(VMov);
|
||||
DEF_OP(VAnd);
|
||||
DEF_OP(VBic);
|
||||
DEF_OP(VOr);
|
||||
DEF_OP(VXor);
|
||||
DEF_OP(VAdd);
|
||||
DEF_OP(VSub);
|
||||
DEF_OP(VUQAdd);
|
||||
DEF_OP(VUQSub);
|
||||
DEF_OP(VSQAdd);
|
||||
DEF_OP(VSQSub);
|
||||
DEF_OP(VAddP);
|
||||
DEF_OP(VAddV);
|
||||
DEF_OP(VUMinV);
|
||||
DEF_OP(VURAvg);
|
||||
DEF_OP(VAbs);
|
||||
DEF_OP(VPopcount);
|
||||
DEF_OP(VFAdd);
|
||||
DEF_OP(VFAddP);
|
||||
DEF_OP(VFSub);
|
||||
DEF_OP(VFMul);
|
||||
DEF_OP(VFDiv);
|
||||
DEF_OP(VFMin);
|
||||
DEF_OP(VFMax);
|
||||
DEF_OP(VFRecp);
|
||||
DEF_OP(VFSqrt);
|
||||
DEF_OP(VFRSqrt);
|
||||
DEF_OP(VNeg);
|
||||
DEF_OP(VFNeg);
|
||||
DEF_OP(VNot);
|
||||
DEF_OP(VUMin);
|
||||
DEF_OP(VSMin);
|
||||
DEF_OP(VUMax);
|
||||
DEF_OP(VSMax);
|
||||
DEF_OP(VZip);
|
||||
DEF_OP(VUnZip);
|
||||
DEF_OP(VBSL);
|
||||
DEF_OP(VCMPEQ);
|
||||
DEF_OP(VCMPEQZ);
|
||||
DEF_OP(VCMPGT);
|
||||
DEF_OP(VCMPGTZ);
|
||||
DEF_OP(VCMPLTZ);
|
||||
DEF_OP(VFCMPEQ);
|
||||
DEF_OP(VFCMPNEQ);
|
||||
DEF_OP(VFCMPLT);
|
||||
DEF_OP(VFCMPGT);
|
||||
DEF_OP(VFCMPLE);
|
||||
DEF_OP(VFCMPORD);
|
||||
DEF_OP(VFCMPUNO);
|
||||
DEF_OP(VUShl);
|
||||
DEF_OP(VUShr);
|
||||
DEF_OP(VSShr);
|
||||
DEF_OP(VUShlS);
|
||||
DEF_OP(VUShrS);
|
||||
DEF_OP(VSShrS);
|
||||
DEF_OP(VInsElement);
|
||||
DEF_OP(VInsScalarElement);
|
||||
DEF_OP(VExtractElement);
|
||||
DEF_OP(VDupElement);
|
||||
DEF_OP(VExtr);
|
||||
DEF_OP(VSLI);
|
||||
DEF_OP(VSRI);
|
||||
DEF_OP(VUShrI);
|
||||
DEF_OP(VSShrI);
|
||||
DEF_OP(VShlI);
|
||||
DEF_OP(VUShrNI);
|
||||
DEF_OP(VUShrNI2);
|
||||
DEF_OP(VBitcast);
|
||||
DEF_OP(VSXTL);
|
||||
DEF_OP(VSXTL2);
|
||||
DEF_OP(VUXTL);
|
||||
DEF_OP(VUXTL2);
|
||||
DEF_OP(VSQXTN);
|
||||
DEF_OP(VSQXTN2);
|
||||
DEF_OP(VSQXTUN);
|
||||
DEF_OP(VSQXTUN2);
|
||||
DEF_OP(VUMul);
|
||||
DEF_OP(VUMull);
|
||||
DEF_OP(VSMul);
|
||||
DEF_OP(VSMull);
|
||||
DEF_OP(VUMull2);
|
||||
DEF_OP(VSMull2);
|
||||
DEF_OP(VUABDL);
|
||||
DEF_OP(VTBL1);
|
||||
|
||||
///< Encryption ops
|
||||
DEF_OP(AESImc);
|
||||
DEF_OP(AESEnc);
|
||||
DEF_OP(AESEncLast);
|
||||
DEF_OP(AESDec);
|
||||
DEF_OP(AESDecLast);
|
||||
DEF_OP(AESKeyGenAssist);
|
||||
|
||||
///< F80 ops
|
||||
DEF_OP(F80LOADFCW);
|
||||
DEF_OP(F80ADD);
|
||||
DEF_OP(F80SUB);
|
||||
DEF_OP(F80MUL);
|
||||
DEF_OP(F80DIV);
|
||||
DEF_OP(F80FYL2X);
|
||||
DEF_OP(F80ATAN);
|
||||
DEF_OP(F80FPREM1);
|
||||
DEF_OP(F80FPREM);
|
||||
DEF_OP(F80SCALE);
|
||||
DEF_OP(F80CVT);
|
||||
DEF_OP(F80CVTINT);
|
||||
DEF_OP(F80CVTTO);
|
||||
DEF_OP(F80CVTTOINT);
|
||||
DEF_OP(F80ROUND);
|
||||
DEF_OP(F80F2XM1);
|
||||
DEF_OP(F80TAN);
|
||||
DEF_OP(F80SQRT);
|
||||
DEF_OP(F80SIN);
|
||||
DEF_OP(F80COS);
|
||||
DEF_OP(F80XTRACT_EXP);
|
||||
DEF_OP(F80XTRACT_SIG);
|
||||
DEF_OP(F80CMP);
|
||||
DEF_OP(F80BCDLOAD);
|
||||
DEF_OP(F80BCDSTORE);
|
||||
#undef DEF_OP
|
||||
template<typename unsigned_type, typename signed_type, typename float_type>
|
||||
static bool IsConditionTrue(uint8_t Cond, uint64_t Src1, uint64_t Src2) {
|
||||
bool CompResult = false;
|
||||
switch (Cond) {
|
||||
case FEXCore::IR::COND_EQ:
|
||||
CompResult = static_cast<unsigned_type>(Src1) == static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_NEQ:
|
||||
CompResult = static_cast<unsigned_type>(Src1) != static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_SGE:
|
||||
CompResult = static_cast<signed_type>(Src1) >= static_cast<signed_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_SLT:
|
||||
CompResult = static_cast<signed_type>(Src1) < static_cast<signed_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_SGT:
|
||||
CompResult = static_cast<signed_type>(Src1) > static_cast<signed_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_SLE:
|
||||
CompResult = static_cast<signed_type>(Src1) <= static_cast<signed_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_UGE:
|
||||
CompResult = static_cast<unsigned_type>(Src1) >= static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_ULT:
|
||||
CompResult = static_cast<unsigned_type>(Src1) < static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_UGT:
|
||||
CompResult = static_cast<unsigned_type>(Src1) > static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
case FEXCore::IR::COND_ULE:
|
||||
CompResult = static_cast<unsigned_type>(Src1) <= static_cast<unsigned_type>(Src2);
|
||||
break;
|
||||
|
||||
case FEXCore::IR::COND_FLU:
|
||||
CompResult = reinterpret_cast<float_type&>(Src1) < reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_FGE:
|
||||
CompResult = reinterpret_cast<float_type&>(Src1) >= reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_FLEU:
|
||||
CompResult = reinterpret_cast<float_type&>(Src1) <= reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_FGT:
|
||||
CompResult = reinterpret_cast<float_type&>(Src1) > reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_FU:
|
||||
CompResult = (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_FNU:
|
||||
CompResult = !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
|
||||
break;
|
||||
case FEXCore::IR::COND_MI:
|
||||
case FEXCore::IR::COND_PL:
|
||||
case FEXCore::IR::COND_VS:
|
||||
case FEXCore::IR::COND_VC:
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unsupported compare type");
|
||||
break;
|
||||
}
|
||||
|
||||
return CompResult;
|
||||
}
|
||||
|
||||
static uint8_t GetOpSize(FEXCore::IR::IRListView *CurrentIR, IR::OrderedNodeWrapper Node) {
|
||||
auto IROp = CurrentIR->GetOp<FEXCore::IR::IROp_Header>(Node);
|
||||
return IROp->Size;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
|
289
External/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp
vendored
Normal file
289
External/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp
vendored
Normal file
@ -0,0 +1,289 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
static inline void CacheLineFlush(char *Addr) {
|
||||
#ifdef _M_X86_64
|
||||
__asm volatile (
|
||||
"clflush (%[Addr]);"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#else
|
||||
__builtin___clear_cache(Addr, Addr+64);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(LoadContext) {
|
||||
auto Op = IROp->C<IR::IROp_LoadContext>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += Op->Offset;
|
||||
#define LOAD_CTX(x, y) \
|
||||
case x: { \
|
||||
y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
|
||||
GD = *MemData; \
|
||||
break; \
|
||||
}
|
||||
switch (OpSize) {
|
||||
LOAD_CTX(1, uint8_t)
|
||||
LOAD_CTX(2, uint16_t)
|
||||
LOAD_CTX(4, uint32_t)
|
||||
LOAD_CTX(8, uint64_t)
|
||||
case 16: {
|
||||
void const *MemData = reinterpret_cast<void const*>(ContextPtr);
|
||||
memcpy(GDP, MemData, OpSize);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
|
||||
}
|
||||
#undef LOAD_CTX
|
||||
}
|
||||
|
||||
DEF_OP(StoreContext) {
|
||||
auto Op = IROp->C<IR::IROp_StoreContext>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += Op->Offset;
|
||||
|
||||
void *MemData = reinterpret_cast<void*>(ContextPtr);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(MemData, Src, OpSize);
|
||||
}
|
||||
|
||||
DEF_OP(LoadRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(StoreRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(LoadContextIndexed) {
|
||||
auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
|
||||
uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
|
||||
ContextPtr += Op->BaseOffset;
|
||||
ContextPtr += Index * Op->Stride;
|
||||
|
||||
#define LOAD_CTX(x, y) \
|
||||
case x: { \
|
||||
y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
|
||||
GD = *MemData; \
|
||||
break; \
|
||||
}
|
||||
switch (Op->Size) {
|
||||
LOAD_CTX(1, uint8_t)
|
||||
LOAD_CTX(2, uint16_t)
|
||||
LOAD_CTX(4, uint32_t)
|
||||
LOAD_CTX(8, uint64_t)
|
||||
case 16: {
|
||||
void const *MemData = reinterpret_cast<void const*>(ContextPtr);
|
||||
memcpy(GDP, MemData, Op->Size);
|
||||
break;
|
||||
}
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", Op->Size);
|
||||
}
|
||||
#undef LOAD_CTX
|
||||
}
|
||||
|
||||
DEF_OP(StoreContextIndexed) {
|
||||
auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
|
||||
uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += Op->BaseOffset;
|
||||
ContextPtr += Index * Op->Stride;
|
||||
|
||||
void *MemData = reinterpret_cast<void*>(ContextPtr);
|
||||
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(MemData, Src, Op->Size);
|
||||
}
|
||||
|
||||
DEF_OP(SpillRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(FillRegister) {
|
||||
LOGMAN_MSG_A_FMT("Unimplemented");
|
||||
}
|
||||
|
||||
DEF_OP(LoadFlag) {
|
||||
auto Op = IROp->C<IR::IROp_LoadFlag>();
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
|
||||
ContextPtr += Op->Flag;
|
||||
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
|
||||
GD = *MemData;
|
||||
}
|
||||
|
||||
DEF_OP(StoreFlag) {
|
||||
auto Op = IROp->C<IR::IROp_StoreFlag>();
|
||||
uint8_t Arg = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
|
||||
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
|
||||
ContextPtr += Op->Flag;
|
||||
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
|
||||
*MemData = Arg;
|
||||
}
|
||||
|
||||
DEF_OP(LoadMem) {
|
||||
auto Op = IROp->C<IR::IROp_LoadMem>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uint8_t const *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
memset(GDP, 0, 16);
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
const uint8_t *D = (const uint8_t*)MemData;
|
||||
GD = *D;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
const uint16_t *D = (const uint16_t*)MemData;
|
||||
GD = *D;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
const uint32_t *D = (const uint32_t*)MemData;
|
||||
GD = *D;
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
const uint64_t *D = (const uint64_t*)MemData;
|
||||
GD = *D;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
memcpy(GDP, MemData, Op->Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreMem) {
|
||||
auto Op = IROp->C<IR::IROp_StoreMem>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
uint8_t *MemData = *GetSrc<uint8_t **>(Data->SSAData, Op->Addr);
|
||||
|
||||
if (!Op->Offset.IsInvalid()) {
|
||||
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
|
||||
|
||||
switch(Op->OffsetType.Val) {
|
||||
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
|
||||
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
|
||||
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
|
||||
}
|
||||
}
|
||||
switch (OpSize) {
|
||||
case 1: {
|
||||
*reinterpret_cast<uint8_t*>(MemData) = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
*reinterpret_cast<uint16_t*>(MemData) = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
*reinterpret_cast<uint32_t*>(MemData) = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
*reinterpret_cast<uint64_t*>(MemData) = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
memcpy(MemData, GetSrc<void*>(Data->SSAData, Op->Value), Op->Size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VLoadMemElement) {
|
||||
auto Op = IROp->C<IR::IROp_VLoadMemElement>();
|
||||
void const *MemData = *GetSrc<void const**>(Data->SSAData, Op->Header.Args[0]);
|
||||
|
||||
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[1]), 16);
|
||||
memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(GDP) + (Op->Header.ElementSize * Op->Index)),
|
||||
MemData, Op->Header.ElementSize);
|
||||
}
|
||||
|
||||
DEF_OP(VStoreMemElement) {
|
||||
#define STORE_DATA(x, y) \
|
||||
case x: { \
|
||||
y *MemData = *GetSrc<y**>(Data->SSAData, Op->Header.Args[0]); \
|
||||
memcpy(MemData, &GetSrc<y*>(Data->SSAData, Op->Header.Args[1])[Op->Index], sizeof(y)); \
|
||||
break; \
|
||||
}
|
||||
|
||||
auto Op = IROp->C<IR::IROp_VStoreMemElement>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
switch (OpSize) {
|
||||
STORE_DATA(1, uint8_t)
|
||||
STORE_DATA(2, uint16_t)
|
||||
STORE_DATA(4, uint32_t)
|
||||
STORE_DATA(8, uint64_t)
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size"); break;
|
||||
}
|
||||
#undef STORE_DATA
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClear) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClear>();
|
||||
|
||||
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
|
||||
|
||||
// 64-byte cache line clear
|
||||
CacheLineFlush(MemData);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterMemoryHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(LOADCONTEXT, LoadContext);
|
||||
REGISTER_OP(STORECONTEXT, StoreContext);
|
||||
REGISTER_OP(LOADREGISTER, LoadRegister);
|
||||
REGISTER_OP(STOREREGISTER, StoreRegister);
|
||||
REGISTER_OP(LOADCONTEXTINDEXED, LoadContextIndexed);
|
||||
REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed);
|
||||
REGISTER_OP(SPILLREGISTER, SpillRegister);
|
||||
REGISTER_OP(FILLREGISTER, FillRegister);
|
||||
REGISTER_OP(LOADFLAG, LoadFlag);
|
||||
REGISTER_OP(STOREFLAG, StoreFlag);
|
||||
REGISTER_OP(LOADMEM, LoadMem);
|
||||
REGISTER_OP(STOREMEM, StoreMem);
|
||||
REGISTER_OP(LOADMEMTSO, LoadMem);
|
||||
REGISTER_OP(STOREMEMTSO, StoreMem);
|
||||
REGISTER_OP(VLOADMEMELEMENT, VLoadMemElement);
|
||||
REGISTER_OP(VSTOREMEMELEMENT, VStoreMemElement);
|
||||
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
158
External/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp
vendored
Normal file
158
External/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
#ifdef _M_X86_64
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
[[noreturn]]
|
||||
static void StopThread(FEXCore::Core::InternalThreadState *Thread) {
|
||||
Thread->CTX->StopThread(Thread);
|
||||
|
||||
LOGMAN_MSG_A_FMT("unreachable");
|
||||
FEX_UNREACHABLE;
|
||||
}
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(Fence) {
|
||||
auto Op = IROp->C<IR::IROp_Fence>();
|
||||
switch (Op->Fence) {
|
||||
case IR::Fence_Load.Val:
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
break;
|
||||
case IR::Fence_LoadStore.Val:
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
break;
|
||||
case IR::Fence_Store.Val:
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
break;
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Break) {
|
||||
auto Op = IROp->C<IR::IROp_Break>();
|
||||
switch (Op->Reason) {
|
||||
case 4: // HLT
|
||||
StopThread(Data->State);
|
||||
break;
|
||||
default: LOGMAN_MSG_A_FMT("Unknown Break Reason: {}", Op->Reason); break;
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(GetRoundingMode) {
|
||||
uint32_t GuestRounding{};
|
||||
#ifdef _M_ARM_64
|
||||
uint64_t Tmp{};
|
||||
__asm(R"(
|
||||
mrs %[Tmp], FPCR;
|
||||
)"
|
||||
: [Tmp] "=r" (Tmp));
|
||||
// Extract the rounding
|
||||
// On ARM the ordering is different than on x86
|
||||
GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0;
|
||||
uint8_t RoundingMode = (Tmp >> 22) & 0b11;
|
||||
if (RoundingMode == 0)
|
||||
GuestRounding |= IR::ROUND_MODE_NEAREST;
|
||||
else if (RoundingMode == 1)
|
||||
GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY;
|
||||
else if (RoundingMode == 2)
|
||||
GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY;
|
||||
else if (RoundingMode == 3)
|
||||
GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO;
|
||||
#else
|
||||
GuestRounding = _mm_getcsr();
|
||||
|
||||
// Extract the rounding
|
||||
GuestRounding = (GuestRounding >> 13) & 0b111;
|
||||
#endif
|
||||
memcpy(GDP, &GuestRounding, sizeof(GuestRounding));
|
||||
}
|
||||
|
||||
DEF_OP(SetRoundingMode) {
|
||||
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
|
||||
uint8_t GuestRounding = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
#ifdef _M_ARM_64
|
||||
uint64_t HostRounding{};
|
||||
__asm volatile(R"(
|
||||
mrs %[Tmp], FPCR;
|
||||
)"
|
||||
: [Tmp] "=r" (HostRounding));
|
||||
// Mask out the rounding
|
||||
HostRounding &= ~(0b111 << 22);
|
||||
|
||||
HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0;
|
||||
|
||||
uint8_t RoundingMode = GuestRounding & 0b11;
|
||||
if (RoundingMode == IR::ROUND_MODE_NEAREST)
|
||||
HostRounding |= (0b00U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY)
|
||||
HostRounding |= (0b01U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY)
|
||||
HostRounding |= (0b10U << 22);
|
||||
else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO)
|
||||
HostRounding |= (0b11U << 22);
|
||||
|
||||
__asm volatile(R"(
|
||||
msr FPCR, %[Tmp];
|
||||
)"
|
||||
:: [Tmp] "r" (HostRounding));
|
||||
#else
|
||||
uint32_t HostRounding = _mm_getcsr();
|
||||
|
||||
// Cut out the host rounding mode
|
||||
HostRounding &= ~(0b111 << 13);
|
||||
|
||||
// Insert our new rounding mode
|
||||
HostRounding |= GuestRounding << 13;
|
||||
_mm_setcsr(HostRounding);
|
||||
#endif
|
||||
}
|
||||
|
||||
DEF_OP(Print) {
|
||||
auto Op = IROp->C<IR::IROp_Print>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
if (OpSize <= 8) {
|
||||
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src);
|
||||
}
|
||||
else if (OpSize == 16) {
|
||||
__uint128_t Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
|
||||
uint64_t Src0 = Src;
|
||||
uint64_t Src1 = Src >> 64;
|
||||
LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0);
|
||||
LogMan::Msg::IFmt(" Value[1] in Arg: 0x{:x}, {}", Src1, Src1);
|
||||
}
|
||||
else
|
||||
LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterMiscHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(DUMMY, NoOp);
|
||||
REGISTER_OP(IRHEADER, NoOp);
|
||||
REGISTER_OP(CODEBLOCK, NoOp);
|
||||
REGISTER_OP(BEGINBLOCK, NoOp);
|
||||
REGISTER_OP(ENDBLOCK, NoOp);
|
||||
REGISTER_OP(FENCE, Fence);
|
||||
REGISTER_OP(BREAK, Break);
|
||||
REGISTER_OP(PHI, NoOp);
|
||||
REGISTER_OP(PHIVALUE, NoOp);
|
||||
REGISTER_OP(PRINT, Print);
|
||||
REGISTER_OP(GETROUNDINGMODE, GetRoundingMode);
|
||||
REGISTER_OP(SETROUNDINGMODE, SetRoundingMode);
|
||||
REGISTER_OP(INVALIDATEFLAGS, NoOp);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
50
External/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp
vendored
Normal file
50
External/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
$info$
|
||||
tags: backend|interpreter
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/Core/Interpreter/InterpreterClass.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterOps.h"
|
||||
#include "Interface/Core/Interpreter/InterpreterDefines.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
|
||||
DEF_OP(ExtractElementPair) {
|
||||
auto Op = IROp->C<IR::IROp_ExtractElementPair>();
|
||||
uintptr_t Src = GetSrc<uintptr_t>(Data->SSAData, Op->Header.Args[0]);
|
||||
memcpy(GDP,
|
||||
reinterpret_cast<void*>(Src + Op->Header.Size * Op->Element), Op->Header.Size);
|
||||
}
|
||||
|
||||
DEF_OP(CreateElementPair) {
|
||||
auto Op = IROp->C<IR::IROp_CreateElementPair>();
|
||||
void *Src_Lower = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
|
||||
void *Src_Upper = GetSrc<void*>(Data->SSAData, Op->Header.Args[1]);
|
||||
|
||||
uint8_t *Dst = GetDest<uint8_t*>(Data->SSAData, Node);
|
||||
|
||||
memcpy(Dst, Src_Lower, Op->Header.Size);
|
||||
memcpy(Dst + Op->Header.Size, Src_Upper, Op->Header.Size);
|
||||
}
|
||||
|
||||
DEF_OP(Mov) {
|
||||
auto Op = IROp->C<IR::IROp_Mov>();
|
||||
uint8_t OpSize = IROp->Size;
|
||||
|
||||
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), OpSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void InterpreterOps::RegisterMoveHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
|
||||
REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair);
|
||||
REGISTER_OP(CREATEELEMENTPAIR, CreateElementPair);
|
||||
REGISTER_OP(MOV, Mov);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
||||
|
||||
|
2031
External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
vendored
Normal file
2031
External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user