Merge pull request #1307 from Sonicadvance1/InterpreterDispatcher

Interpreter: Splits ops in to separate files
This commit is contained in:
Ryan Houdek 2021-10-21 16:22:45 -07:00 committed by GitHub
commit e9937d9a85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 6542 additions and 5405 deletions

View File

@ -97,6 +97,17 @@ set (SRCS
Interface/Core/Dispatcher/Arm64Dispatcher.cpp
Interface/Core/Interpreter/InterpreterCore.cpp
Interface/Core/Interpreter/InterpreterOps.cpp
Interface/Core/Interpreter/ALUOps.cpp
Interface/Core/Interpreter/AtomicOps.cpp
Interface/Core/Interpreter/BranchOps.cpp
Interface/Core/Interpreter/ConversionOps.cpp
Interface/Core/Interpreter/EncryptionOps.cpp
Interface/Core/Interpreter/F80Ops.cpp
Interface/Core/Interpreter/FlagOps.cpp
Interface/Core/Interpreter/MemoryOps.cpp
Interface/Core/Interpreter/MiscOps.cpp
Interface/Core/Interpreter/MoveOps.cpp
Interface/Core/Interpreter/VectorOps.cpp
Interface/Core/X86Tables/BaseTables.cpp
Interface/Core/X86Tables/DDDTables.cpp
Interface/Core/X86Tables/EVEXTables.cpp

View File

@ -249,6 +249,8 @@ namespace FEXCore::Context {
LocalLoader = Loader;
using namespace FEXCore::Core;
FEXCore::CPU::InitializeInterpreterOpHandlers();
FEXCore::Core::CPUState NewThreadState = CreateDefaultCPUState();
FEXCore::Core::InternalThreadState *Thread = CreateThread(&NewThreadState, 0);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,793 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <FEXCore/Utils/BitUtils.h>
#include <cstdint>
namespace FEXCore::CPU {
#ifdef _M_X86_64
static uint8_t AtomicFetchNeg(uint8_t *Addr) {
using Type = uint8_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
static uint16_t AtomicFetchNeg(uint16_t *Addr) {
using Type = uint16_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
static uint32_t AtomicFetchNeg(uint32_t *Addr) {
using Type = uint32_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
static uint64_t AtomicFetchNeg(uint64_t *Addr) {
using Type = uint64_t;
std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
Type Expected = MemData->load();
Type Desired = -Expected;
do {
Desired = -Expected;
} while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
return Expected;
}
template<typename T>
static T AtomicCompareAndSwap(T expected, T desired, T *addr)
{
std::atomic<T> *MemData = reinterpret_cast<std::atomic<T>*>(addr);
T Src1 = expected;
T Src2 = desired;
T Expected = Src1;
bool Result = MemData->compare_exchange_strong(Expected, Src2);
return Result ? Src1 : Expected;
}
#else
// Needs to match what the AArch64 JIT and unaligned signal handler expects
uint8_t AtomicFetchNeg(uint8_t *Addr) {
using Type = uint8_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxrb %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint16_t AtomicFetchNeg(uint16_t *Addr) {
using Type = uint16_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxrh %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint32_t AtomicFetchNeg(uint32_t *Addr) {
using Type = uint32_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxr %w[Result], [%[Memory]];
neg %w[Tmp], %w[Result];
stlxr %w[TmpStatus], %w[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
uint64_t AtomicFetchNeg(uint64_t *Addr) {
using Type = uint64_t;
Type Result{};
Type Tmp{};
Type TmpStatus{};
__asm__ volatile(
R"(
1:
ldaxr %[Result], [%[Memory]];
neg %[Tmp], %[Result];
stlxr %w[TmpStatus], %[Tmp], [%[Memory]];
cbnz %w[TmpStatus], 1b;
)"
: [Result] "=r" (Result)
, [Tmp] "=r" (Tmp)
, [TmpStatus] "=r" (TmpStatus)
, [Memory] "+r" (Addr)
:: "memory"
);
return Result;
}
template<typename T>
static T AtomicCompareAndSwap(T expected, T desired, T *addr);
template<>
uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) {
using Type = uint8_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxrb %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected], uxtb;
b.ne 2f;
stlxrb %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) {
using Type = uint16_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxrh %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected], uxth;
b.ne 2f;
stlxrh %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) {
using Type = uint32_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxr %w[Tmp], [%[Memory]];
cmp %w[Tmp], %w[Expected];
b.ne 2f;
stlxr %w[Tmp2], %w[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %w[Result], %w[Expected];
b 3f;
2:
mov %w[Result], %w[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
template<>
uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) {
using Type = uint64_t;
//force Result to r9 (scratch register) or clang spills to stack
register Type Result asm("r9"){};
Type Tmp{};
Type Tmp2{};
__asm__ volatile(
R"(
1:
ldaxr %[Tmp], [%[Memory]];
cmp %[Tmp], %[Expected];
b.ne 2f;
stlxr %w[Tmp2], %[Desired], [%[Memory]];
cbnz %w[Tmp2], 1b;
mov %[Result], %[Expected];
b 3f;
2:
mov %[Result], %[Tmp];
clrex;
3:
)"
: [Tmp] "=r" (Tmp)
, [Tmp2] "=r" (Tmp2)
, [Desired] "+r" (desired)
, [Expected] "+r" (expected)
, [Result] "=r" (Result)
, [Memory] "+r" (addr)
:: "memory"
);
return Result;
}
#endif
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(CASPair) {
auto Op = IROp->C<IR::IROp_CASPair>();
uint8_t OpSize = IROp->Size;
// Size is the size of each pair element
switch (OpSize) {
case 4: {
GD = AtomicCompareAndSwap(
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
*GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
);
break;
}
case 8: {
std::atomic<__uint128_t> *MemData = *GetSrc<std::atomic<__uint128_t> **>(Data->SSAData, Op->Header.Args[2]);
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
__uint128_t Expected = Src1;
bool Result = MemData->compare_exchange_strong(Expected, Src2);
memcpy(GDP, Result ? &Src1 : &Expected, 16);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
}
}
DEF_OP(CAS) {
auto Op = IROp->C<IR::IROp_CAS>();
uint8_t OpSize = IROp->Size;
switch (OpSize) {
case 1: {
GD = AtomicCompareAndSwap(
*GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]),
*GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]),
*GetSrc<uint8_t**>(Data->SSAData, Op->Header.Args[2])
);
break;
}
case 2: {
GD = AtomicCompareAndSwap(
*GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[0]),
*GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]),
*GetSrc<uint16_t**>(Data->SSAData, Op->Header.Args[2])
);
break;
}
case 4: {
GD = AtomicCompareAndSwap(
*GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[0]),
*GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]),
*GetSrc<uint32_t**>(Data->SSAData, Op->Header.Args[2])
);
break;
}
case 8: {
GD = AtomicCompareAndSwap(
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
*GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
}
}
DEF_OP(AtomicAdd) {
auto Op = IROp->C<IR::IROp_AtomicAdd>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData += Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData += Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData += Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData += Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicSub) {
auto Op = IROp->C<IR::IROp_AtomicSub>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData -= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData -= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData -= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData -= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicAnd) {
auto Op = IROp->C<IR::IROp_AtomicAnd>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData &= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData &= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData &= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData &= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicOr) {
auto Op = IROp->C<IR::IROp_AtomicOr>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData |= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData |= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData |= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData |= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicXor) {
auto Op = IROp->C<IR::IROp_AtomicXor>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData ^= Src;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData ^= Src;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData ^= Src;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
*MemData ^= Src;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicSwap) {
auto Op = IROp->C<IR::IROp_AtomicSwap>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->exchange(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchAdd) {
auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->fetch_add(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchSub) {
auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->fetch_sub(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchAnd) {
auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->fetch_and(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchOr) {
auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->fetch_or(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchXor) {
auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
switch (Op->Size) {
case 1: {
std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
uint8_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 2: {
std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
uint16_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 4: {
std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
uint32_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
case 8: {
std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Previous = MemData->fetch_xor(Src);
GD = Previous;
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
DEF_OP(AtomicFetchNeg) {
auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
switch (Op->Size) {
case 1: {
using Type = uint8_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
break;
}
case 2: {
using Type = uint16_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
break;
}
case 4: {
using Type = uint32_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
break;
}
case 8: {
using Type = uint64_t;
GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
}
}
#undef DEF_OP
void InterpreterOps::RegisterAtomicHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(CASPAIR, CASPair);
REGISTER_OP(CAS, CAS);
REGISTER_OP(ATOMICADD, AtomicAdd);
REGISTER_OP(ATOMICSUB, AtomicSub);
REGISTER_OP(ATOMICAND, AtomicAnd);
REGISTER_OP(ATOMICOR, AtomicOr);
REGISTER_OP(ATOMICXOR, AtomicXor);
REGISTER_OP(ATOMICSWAP, AtomicSwap);
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,157 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include "Interface/HLE/Thunks/Thunks.h"
#include <FEXCore/Utils/BitUtils.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <cstdint>
namespace FEXCore::CPU {
[[noreturn]]
static void SignalReturn(FEXCore::Core::InternalThreadState *Thread) {
Thread->CTX->SignalThread(Thread, FEXCore::Core::SignalEvent::Return);
LOGMAN_MSG_A_FMT("unreachable");
FEX_UNREACHABLE;
}
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(GuestCallDirect) {
LogMan::Msg::DFmt("Unimplemented");
}
DEF_OP(GuestCallIndirect) {
LogMan::Msg::DFmt("Unimplemented");
}
DEF_OP(GuestReturn) {
LogMan::Msg::DFmt("Unimplemented");
}
DEF_OP(SignalReturn) {
SignalReturn(Data->State);
}
DEF_OP(CallbackReturn) {
Data->State->CTX->InterpreterCallbackReturn(Data->State, Data->StackEntry);
}
DEF_OP(ExitFunction) {
auto Op = IROp->C<IR::IROp_ExitFunction>();
uint8_t OpSize = IROp->Size;
uintptr_t* ContextPtr = reinterpret_cast<uintptr_t*>(Data->State->CurrentFrame);
void *ContextData = reinterpret_cast<void*>(ContextPtr);
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
memcpy(ContextData, Src, OpSize);
Data->BlockResults.Quit = true;
}
DEF_OP(Jump) {
auto Op = IROp->C<IR::IROp_Jump>();
uintptr_t ListBegin = Data->CurrentIR->GetListData();
uintptr_t DataBegin = Data->CurrentIR->GetData();
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->Header.Args[0]);
Data->BlockResults.Redo = true;
}
DEF_OP(CondJump) {
auto Op = IROp->C<IR::IROp_CondJump>();
uintptr_t ListBegin = Data->CurrentIR->GetListData();
uintptr_t DataBegin = Data->CurrentIR->GetData();
bool CompResult;
uint64_t Src1 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp1);
uint64_t Src2 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp2);
if (Op->CompareSize == 4)
CompResult = IsConditionTrue<uint32_t, int32_t, float>(Op->Cond.Val, Src1, Src2);
else
CompResult = IsConditionTrue<uint64_t, int64_t, double>(Op->Cond.Val, Src1, Src2);
if (CompResult) {
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock);
}
else {
Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock);
}
Data->BlockResults.Redo = true;
}
DEF_OP(Syscall) {
auto Op = IROp->C<IR::IROp_Syscall>();
FEXCore::HLE::SyscallArguments Args;
for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
if (Op->Header.Args[j].IsInvalid()) break;
Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
}
uint64_t Res = FEXCore::Context::HandleSyscall(Data->State->CTX->SyscallHandler, Data->State->CurrentFrame, &Args);
GD = Res;
}
DEF_OP(Thunk) {
auto Op = IROp->C<IR::IROp_Thunk>();
auto thunkFn = Data->State->CTX->ThunkHandler->LookupThunk(Op->ThunkNameHash);
thunkFn(*GetSrc<void**>(Data->SSAData, Op->Header.Args[0]));
}
DEF_OP(ValidateCode) {
auto Op = IROp->C<IR::IROp_ValidateCode>();
auto CodePtr = Data->CurrentEntry + Op->Offset;
if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) {
GD = 1;
} else {
GD = 0;
}
}
DEF_OP(RemoveCodeEntry) {
Data->State->CTX->RemoveCodeEntry(Data->State, Data->CurrentEntry);
}
DEF_OP(CPUID) {
auto Op = IROp->C<IR::IROp_CPUID>();
uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
uint64_t Arg = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
uint64_t Leaf = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
auto Results = Data->State->CTX->CPUID.RunFunction(Arg, Leaf);
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
}
#undef DEF_OP
void InterpreterOps::RegisterBranchHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(GUESTCALLDIRECT, GuestCallDirect);
REGISTER_OP(GUESTCALLINDIRECT, GuestCallIndirect);
REGISTER_OP(GUESTRETURN, GuestReturn);
REGISTER_OP(SIGNALRETURN, SignalReturn);
REGISTER_OP(CALLBACKRETURN, CallbackReturn);
REGISTER_OP(EXITFUNCTION, ExitFunction);
REGISTER_OP(JUMP, Jump);
REGISTER_OP(CONDJUMP, CondJump);
REGISTER_OP(SYSCALL, Syscall);
REGISTER_OP(THUNK, Thunk);
REGISTER_OP(VALIDATECODE, ValidateCode);
REGISTER_OP(REMOVECODEENTRY, RemoveCodeEntry);
REGISTER_OP(CPUID, CPUID);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,237 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(VInsGPR) {
auto Op = IROp->C<IR::IROp_VInsGPR>();
uint8_t OpSize = IROp->Size;
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
uint64_t Offset = Op->Index * Op->Header.ElementSize * 8;
__uint128_t Mask = (1ULL << (Op->Header.ElementSize * 8)) - 1;
if (Op->Header.ElementSize == 8) {
Mask = ~0ULL;
}
Src2 = Src2 & Mask;
Mask <<= Offset;
Mask = ~Mask;
__uint128_t Dst = Src1 & Mask;
Dst |= Src2 << Offset;
memcpy(GDP, &Dst, OpSize);
}
DEF_OP(VCastFromGPR) {
auto Op = IROp->C<IR::IROp_VCastFromGPR>();
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), Op->Header.ElementSize);
}
DEF_OP(Float_FromGPR_S) {
auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
switch (Conv) {
case 0x0404: { // Float <- int32_t
float Dst = (float)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0408: { // Float <- int64_t
float Dst = (float)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0804: { // Double <- int32_t
double Dst = (double)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
case 0x0808: { // Double <- int64_t
double Dst = (double)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, Op->Header.ElementSize);
break;
}
}
}
DEF_OP(Float_FToF) {
auto Op = IROp->C<IR::IROp_Float_FToF>();
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
switch (Conv) {
case 0x0804: { // Double <- Float
double Dst = (double)*GetSrc<float*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, 8);
break;
}
case 0x0408: { // Float <- Double
float Dst = (float)*GetSrc<double*>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, &Dst, 4);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
}
}
DEF_OP(Vector_SToF) {
auto Op = IROp->C<IR::IROp_Vector_SToF>();
uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
uint8_t Tmp[16]{};
uint8_t Elements = OpSize / Op->Header.ElementSize;
auto Func = [](auto a, auto min, auto max) { return a; };
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
memcpy(GDP, Tmp, OpSize);
}
DEF_OP(Vector_FToZS) {
auto Op = IROp->C<IR::IROp_Vector_FToZS>();
uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
uint8_t Tmp[16]{};
uint8_t Elements = OpSize / Op->Header.ElementSize;
auto Func = [](auto a, auto min, auto max) { return std::trunc(a); };
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
memcpy(GDP, Tmp, OpSize);
}
DEF_OP(Vector_FToS) {
auto Op = IROp->C<IR::IROp_Vector_FToS>();
uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
uint8_t Tmp[16]{};
uint8_t Elements = OpSize / Op->Header.ElementSize;
auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); };
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
memcpy(GDP, Tmp, OpSize);
}
DEF_OP(Vector_FToF) {
auto Op = IROp->C<IR::IROp_Vector_FToF>();
uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
uint8_t Tmp[16]{};
uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
auto Func = [](auto a, auto min, auto max) { return a; };
switch (Conv) {
case 0x0804: { // Double <- float
// Only the lower elements from the source
// This uses half the source elements
uint8_t Elements = OpSize / 8;
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0)
break;
}
case 0x0408: { // Float <- Double
// Little bit tricky here
// Sometimes is used to convert from a 128bit vector register
// in to a 64bit vector register with different sized elements
// eg: %ssa5 i32v2 = Vector_FToF %ssa4 i128, #0x8
uint8_t Elements = (OpSize << 1) / Op->SrcElementSize;
DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0)
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv); break;
}
memcpy(GDP, Tmp, OpSize);
}
DEF_OP(Vector_FToI) {
auto Op = IROp->C<IR::IROp_Vector_FToI>();
uint8_t OpSize = IROp->Size;
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
uint8_t Tmp[16]{};
uint8_t Elements = OpSize / Op->Header.ElementSize;
auto Func_Nearest = [](auto a) { return std::rint(a); };
auto Func_Neg = [](auto a) { return std::floor(a); };
auto Func_Pos = [](auto a) { return std::ceil(a); };
auto Func_Trunc = [](auto a) { return std::trunc(a); };
auto Func_Host = [](auto a) { return std::rint(a); };
switch (Op->Round) {
case FEXCore::IR::Round_Nearest.Val:
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Nearest)
DO_VECTOR_1SRC_OP(8, double, Func_Nearest)
}
break;
case FEXCore::IR::Round_Negative_Infinity.Val:
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Neg)
DO_VECTOR_1SRC_OP(8, double, Func_Neg)
}
break;
case FEXCore::IR::Round_Positive_Infinity.Val:
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Pos)
DO_VECTOR_1SRC_OP(8, double, Func_Pos)
}
break;
case FEXCore::IR::Round_Towards_Zero.Val:
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Trunc)
DO_VECTOR_1SRC_OP(8, double, Func_Trunc)
}
break;
case FEXCore::IR::Round_Host.Val:
switch (Op->Header.ElementSize) {
DO_VECTOR_1SRC_OP(4, float, Func_Host)
DO_VECTOR_1SRC_OP(8, double, Func_Host)
}
break;
}
memcpy(GDP, Tmp, OpSize);
}
#undef DEF_OP
void InterpreterOps::RegisterConversionHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(VINSGPR, VInsGPR);
REGISTER_OP(VCASTFROMGPR, VCastFromGPR);
REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S);
REGISTER_OP(FLOAT_FTOF, Float_FToF);
REGISTER_OP(VECTOR_STOF, Vector_SToF);
REGISTER_OP(VECTOR_FTOZS, Vector_FToZS);
REGISTER_OP(VECTOR_FTOS, Vector_FToS);
REGISTER_OP(VECTOR_FTOF, Vector_FToF);
REGISTER_OP(VECTOR_FTOI, Vector_FToI);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,443 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace AES {
static __uint128_t InvShiftRows(uint8_t *State) {
uint8_t Shifted[16] = {
State[0], State[13], State[10], State[7],
State[4], State[1], State[14], State[11],
State[8], State[5], State[2], State[15],
State[12], State[9], State[6], State[3],
};
__uint128_t Res{};
memcpy(&Res, Shifted, 16);
return Res;
}
static __uint128_t InvSubBytes(uint8_t *State) {
// 16x16 matrix table
static const uint8_t InvSubstitutionTable[256] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
};
// Uses a byte substitution table with a constant set of values
// Needs to do a table look up
uint8_t Substituted[16];
for (size_t i = 0; i < 16; ++i) {
Substituted[i] = InvSubstitutionTable[State[i]];
}
__uint128_t Res{};
memcpy(&Res, Substituted, 16);
return Res;
}
static __uint128_t ShiftRows(uint8_t *State) {
uint8_t Shifted[16] = {
State[0], State[5], State[10], State[15],
State[4], State[9], State[14], State[3],
State[8], State[13], State[2], State[7],
State[12], State[1], State[6], State[11],
};
__uint128_t Res{};
memcpy(&Res, Shifted, 16);
return Res;
}
static __uint128_t SubBytes(uint8_t *State, size_t Bytes) {
// 16x16 matrix table
static const uint8_t SubstitutionTable[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
// Uses a byte substitution table with a constant set of values
// Needs to do a table look up
uint8_t Substituted[16];
Bytes = std::min(Bytes, (size_t)16);
for (size_t i = 0; i < Bytes; ++i) {
Substituted[i] = SubstitutionTable[State[i]];
}
__uint128_t Res{};
memcpy(&Res, Substituted, Bytes);
return Res;
}
static uint8_t FFMul02(uint8_t in) {
static const uint8_t FFMul02[256] = {
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
};
return FFMul02[in];
}
static uint8_t FFMul03(uint8_t in) {
static const uint8_t FFMul03[256] = {
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
};
return FFMul03[in];
}
static __uint128_t MixColumns(uint8_t *State) {
uint8_t In0[16] = {
State[0], State[4], State[8], State[12],
State[1], State[5], State[9], State[13],
State[2], State[6], State[10], State[14],
State[3], State[7], State[11], State[15],
};
uint8_t Out0[4]{};
uint8_t Out1[4]{};
uint8_t Out2[4]{};
uint8_t Out3[4]{};
for (size_t i = 0; i < 4; ++i) {
Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i];
Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i];
Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]);
Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]);
}
uint8_t OutArray[16] = {
Out0[0], Out1[0], Out2[0], Out3[0],
Out0[1], Out1[1], Out2[1], Out3[1],
Out0[2], Out1[2], Out2[2], Out3[2],
Out0[3], Out1[3], Out2[3], Out3[3],
};
__uint128_t Res{};
memcpy(&Res, OutArray, 16);
return Res;
}
static uint8_t FFMul09(uint8_t in) {
static const uint8_t FFMul09[256] = {
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
};
return FFMul09[in];
}
static uint8_t FFMul0B(uint8_t in) {
static const uint8_t FFMul0B[256] = {
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
};
return FFMul0B[in];
}
static uint8_t FFMul0D(uint8_t in) {
static const uint8_t FFMul0D[256] = {
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
};
return FFMul0D[in];
}
static uint8_t FFMul0E(uint8_t in) {
static const uint8_t FFMul0E[256] = {
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
};
return FFMul0E[in];
}
static __uint128_t InvMixColumns(uint8_t *State) {
uint8_t In0[16] = {
State[0], State[4], State[8], State[12],
State[1], State[5], State[9], State[13],
State[2], State[6], State[10], State[14],
State[3], State[7], State[11], State[15],
};
uint8_t Out0[4]{};
uint8_t Out1[4]{};
uint8_t Out2[4]{};
uint8_t Out3[4]{};
for (size_t i = 0; i < 4; ++i) {
Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]);
Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]);
Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]);
Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]);
}
uint8_t OutArray[16] = {
Out0[0], Out1[0], Out2[0], Out3[0],
Out0[1], Out1[1], Out2[1], Out3[1],
Out0[2], Out1[2], Out2[2], Out3[2],
Out0[3], Out1[3], Out2[3], Out3[3],
};
__uint128_t Res{};
memcpy(&Res, OutArray, 16);
return Res;
}
}
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(AESImc) {
auto Op = IROp->C<IR::IROp_VAESImc>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
// Pseudo-code
// Dst = InvMixColumns(STATE)
__uint128_t Tmp{};
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Src1));
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESEnc) {
auto Op = IROp->C<IR::IROp_VAESEnc>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = ShiftRows(STATE)
// STATE = SubBytes(STATE)
// STATE = MixColumns(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
Tmp = AES::MixColumns(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESEncLast) {
auto Op = IROp->C<IR::IROp_VAESEncLast>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = ShiftRows(STATE)
// STATE = SubBytes(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESDec) {
auto Op = IROp->C<IR::IROp_VAESDec>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = InvShiftRows(STATE)
// STATE = InvSubBytes(STATE)
// STATE = InvMixColumns(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESDecLast) {
auto Op = IROp->C<IR::IROp_VAESDecLast>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
// Pseudo-code
// STATE = Src1
// RoundKey = Src2
// STATE = InvShiftRows(STATE)
// STATE = InvSubBytes(STATE)
// Dst = STATE XOR RoundKey
__uint128_t Tmp{};
Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
Tmp = Tmp ^ Src2;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
DEF_OP(AESKeyGenAssist) {
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
// Pseudo-code
// X3 = Src1[127:96]
// X2 = Src1[95:64]
// X1 = Src1[63:32]
// X0 = Src1[31:30]
// RCON = (Zext)rcon
// Dest[31:0] = SubWord(X1)
// Dest[63:32] = RotWord(SubWord(X1)) XOR RCON
// Dest[95:64] = SubWord(X3)
// Dest[127:96] = RotWord(SubWord(X3)) XOR RCON
__uint128_t Tmp{};
uint32_t X1{};
uint32_t X3{};
memcpy(&X1, &Src1[4], 4);
memcpy(&X3, &Src1[12], 4);
uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X1), 4);
uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X3), 4);
auto Ror = [] (auto In, auto R) {
auto RotateMask = sizeof(In) * 8 - 1;
R &= RotateMask;
return (In >> R) | (In << (sizeof(In) * 8 - R));
};
uint32_t Rot_X1 = Ror(SubWord_X1, 8);
uint32_t Rot_X3 = Ror(SubWord_X3, 8);
Tmp = Rot_X3 ^ Op->RCON;
Tmp <<= 32;
Tmp |= SubWord_X3;
Tmp <<= 32;
Tmp |= Rot_X1 ^ Op->RCON;
Tmp <<= 32;
Tmp |= SubWord_X1;
memcpy(GDP, &Tmp, sizeof(Tmp));
}
#undef DEF_OP
void InterpreterOps::RegisterEncryptionHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(VAESIMC, AESImc);
REGISTER_OP(VAESENC, AESEnc);
REGISTER_OP(VAESENCLAST, AESEncLast);
REGISTER_OP(VAESDEC, AESDec);
REGISTER_OP(VAESDECLAST, AESDecLast);
REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,389 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include "F80Ops.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(F80LOADFCW) {
FEXCore::CPU::OpHandlers<IR::OP_F80LOADFCW>::handle(*GetSrc<uint16_t*>(Data->SSAData, IROp->Args[0]));
}
DEF_OP(F80ADD) {
auto Op = IROp->C<IR::IROp_F80Add>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FADD(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SUB) {
auto Op = IROp->C<IR::IROp_F80Sub>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FSUB(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80MUL) {
auto Op = IROp->C<IR::IROp_F80Mul>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FMUL(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80DIV) {
auto Op = IROp->C<IR::IROp_F80Div>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FDIV(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FYL2X) {
auto Op = IROp->C<IR::IROp_F80FYL2X>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FYL2X(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80ATAN) {
auto Op = IROp->C<IR::IROp_F80ATAN>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FATAN(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FPREM1) {
auto Op = IROp->C<IR::IROp_F80FPREM1>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FREM1(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80FPREM) {
auto Op = IROp->C<IR::IROp_F80FPREM>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FREM(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SCALE) {
auto Op = IROp->C<IR::IROp_F80SCALE>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FSCALE(Src1, Src2);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80CVT) {
auto Op = IROp->C<IR::IROp_F80CVT>();
uint8_t OpSize = IROp->Size;
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
switch (OpSize) {
case 4: {
float Tmp = Src;
memcpy(GDP, &Tmp, OpSize);
break;
}
case 8: {
double Tmp = Src;
memcpy(GDP, &Tmp, OpSize);
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
}
}
DEF_OP(F80CVTINT) {
auto Op = IROp->C<IR::IROp_F80CVTInt>();
uint8_t OpSize = IROp->Size;
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
switch (OpSize) {
case 2: {
int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)(Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
case 4: {
int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)(Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
case 8: {
int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)(Src);
memcpy(GDP, &Tmp, sizeof(Tmp));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
}
}
DEF_OP(F80CVTTO) {
auto Op = IROp->C<IR::IROp_F80CVTTo>();
switch (Op->Size) {
case 4: {
float Src = *GetSrc<float *>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp = Src;
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
case 8: {
double Src = *GetSrc<double *>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp = Src;
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
}
}
DEF_OP(F80CVTTOINT) {
auto Op = IROp->C<IR::IROp_F80CVTToInt>();
switch (Op->Size) {
case 2: {
int16_t Src = *GetSrc<int16_t*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp = Src;
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
case 4: {
int32_t Src = *GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp = Src;
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
break;
}
default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
}
}
DEF_OP(F80ROUND) {
auto Op = IROp->C<IR::IROp_F80Round>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FRNDINT(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80F2XM1) {
auto Op = IROp->C<IR::IROp_F80F2XM1>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::F2XM1(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80TAN) {
auto Op = IROp->C<IR::IROp_F80TAN>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FTAN(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SQRT) {
auto Op = IROp->C<IR::IROp_F80SQRT>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FSQRT(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80SIN) {
auto Op = IROp->C<IR::IROp_F80SIN>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FSIN(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80COS) {
auto Op = IROp->C<IR::IROp_F80COS>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FCOS(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80XTRACT_EXP) {
auto Op = IROp->C<IR::IROp_F80XTRACT_EXP>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FXTRACT_EXP(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80XTRACT_SIG) {
auto Op = IROp->C<IR::IROp_F80XTRACT_SIG>();
X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Tmp;
Tmp = X80SoftFloat::FXTRACT_SIG(Src);
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80CMP) {
auto Op = IROp->C<IR::IROp_F80Cmp>();
uint32_t ResultFlags{};
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
bool eq, lt, nan;
X80SoftFloat::FCMP(Src1, Src2, &eq, &lt, &nan);
if (Op->Flags & (1 << IR::FCMP_FLAG_LT) &&
lt) {
ResultFlags |= (1 << IR::FCMP_FLAG_LT);
}
if (Op->Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
nan) {
ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
}
if (Op->Flags & (1 << IR::FCMP_FLAG_EQ) &&
eq) {
ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
}
GD = ResultFlags;
}
DEF_OP(F80BCDLOAD) {
auto Op = IROp->C<IR::IROp_F80BCDLoad>();
uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
uint64_t BCD{};
// We walk through each uint8_t and pull out the BCD encoding
// Each 4bit split is a digit
// Only 0-9 is supported, A-F results in undefined data
// | 4 bit | 4 bit |
// | 10s place | 1s place |
// EG 0x48 = 48
// EG 0x4847 = 4847
// This gives us an 18digit value encoded in BCD
// The last byte lets us know if it negative or not
for (size_t i = 0; i < 9; ++i) {
uint8_t Digit = Src1[8 - i];
// First shift our last value over
BCD *= 100;
// Add the tens place digit
BCD += (Digit >> 4) * 10;
// Add the ones place digit
BCD += Digit & 0xF;
}
// Set negative flag once converted to x87
bool Negative = Src1[9] & 0x80;
X80SoftFloat Tmp;
Tmp = BCD;
Tmp.Sign = Negative;
memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
}
DEF_OP(F80BCDSTORE) {
auto Op = IROp->C<IR::IROp_F80BCDStore>();
X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
bool Negative = Src1.Sign;
// Clear the Sign bit
Src1.Sign = 0;
uint64_t Tmp = Src1;
uint8_t BCD[10]{};
for (size_t i = 0; i < 9; ++i) {
if (Tmp == 0) {
// Nothing left? Just leave
break;
}
// Extract the lower 100 values
uint8_t Digit = Tmp % 100;
// Now divide it for the next iteration
Tmp /= 100;
uint8_t UpperNibble = Digit / 10;
uint8_t LowerNibble = Digit % 10;
// Now store the BCD
BCD[i] = (UpperNibble << 4) | LowerNibble;
}
// Set negative flag once converted to x87
BCD[9] = Negative ? 0x80 : 0;
memcpy(GDP, BCD, 10);
}
#undef DEF_OP
void InterpreterOps::RegisterF80Handlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(F80LOADFCW, F80LOADFCW);
REGISTER_OP(F80ADD, F80ADD);
REGISTER_OP(F80SUB, F80SUB);
REGISTER_OP(F80MUL, F80MUL);
REGISTER_OP(F80DIV, F80DIV);
REGISTER_OP(F80FYL2X, F80FYL2X);
REGISTER_OP(F80ATAN, F80ATAN);
REGISTER_OP(F80FPREM1, F80FPREM1);
REGISTER_OP(F80FPREM, F80FPREM);
REGISTER_OP(F80SCALE, F80SCALE);
REGISTER_OP(F80CVT, F80CVT);
REGISTER_OP(F80CVTINT, F80CVTINT);
REGISTER_OP(F80CVTTO, F80CVTTO);
REGISTER_OP(F80CVTTOINT, F80CVTTOINT);
REGISTER_OP(F80ROUND, F80ROUND);
REGISTER_OP(F80F2XM1, F80F2XM1);
REGISTER_OP(F80TAN, F80TAN);
REGISTER_OP(F80SQRT, F80SQRT);
REGISTER_OP(F80SIN, F80SIN);
REGISTER_OP(F80COS, F80COS);
REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP);
REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG);
REGISTER_OP(F80CMP, F80CMP);
REGISTER_OP(F80BCDLOAD, F80BCDLOAD);
REGISTER_OP(F80BCDSTORE, F80BCDSTORE);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,330 @@
#pragma once
#include "Common/SoftFloat.h"
#include "Common/SoftFloat-3e/softfloat.h"
#include <FEXCore/IR/IR.h>
namespace FEXCore::CPU {
template<IR::IROps Op>
struct OpHandlers {
};
template<>
struct OpHandlers<IR::OP_F80CVTTO> {
static X80SoftFloat handle4(float src) {
return src;
}
static X80SoftFloat handle8(double src) {
return src;
}
};
template<>
struct OpHandlers<IR::OP_F80CMP> {
template<uint32_t Flags>
static uint64_t handle(X80SoftFloat Src1, X80SoftFloat Src2) {
bool eq, lt, nan;
uint64_t ResultFlags = 0;
X80SoftFloat::FCMP(Src1, Src2, &eq, &lt, &nan);
if (Flags & (1 << IR::FCMP_FLAG_LT) &&
lt) {
ResultFlags |= (1 << IR::FCMP_FLAG_LT);
}
if (Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
nan) {
ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
}
if (Flags & (1 << IR::FCMP_FLAG_EQ) &&
eq) {
ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
}
return ResultFlags;
}
};
template<>
struct OpHandlers<IR::OP_F80CVT> {
static float handle4(X80SoftFloat src) {
return src;
}
static double handle8(X80SoftFloat src) {
return src;
}
};
template<>
struct OpHandlers<IR::OP_F80CVTINT> {
static int16_t handle2(X80SoftFloat src) {
return src;
}
static int32_t handle4(X80SoftFloat src) {
return src;
}
static int64_t handle8(X80SoftFloat src) {
return src;
}
static int16_t handle2t(X80SoftFloat src) {
auto rv = extF80_to_i32(src, softfloat_round_minMag, false);
if (rv > INT16_MAX) {
return INT16_MAX;
} else if (rv < INT16_MIN) {
return INT16_MIN;
} else {
return rv;
}
}
static int32_t handle4t(X80SoftFloat src) {
return extF80_to_i32(src, softfloat_round_minMag, false);
}
static int64_t handle8t(X80SoftFloat src) {
return extF80_to_i64(src, softfloat_round_minMag, false);
}
};
template<>
struct OpHandlers<IR::OP_F80CVTTOINT> {
static X80SoftFloat handle2(int16_t src) {
return src;
}
static X80SoftFloat handle4(int32_t src) {
return src;
}
};
template<>
struct OpHandlers<IR::OP_F80ROUND> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FRNDINT(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80F2XM1> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::F2XM1(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80TAN> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FTAN(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80SQRT> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FSQRT(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80SIN> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FSIN(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80COS> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FCOS(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80XTRACT_EXP> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FXTRACT_EXP(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80XTRACT_SIG> {
static X80SoftFloat handle(X80SoftFloat Src1) {
return X80SoftFloat::FXTRACT_SIG(Src1);
}
};
template<>
struct OpHandlers<IR::OP_F80ADD> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FADD(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80SUB> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FSUB(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80MUL> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FMUL(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80DIV> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FDIV(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80FYL2X> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FYL2X(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80ATAN> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FATAN(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80FPREM1> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FREM1(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80FPREM> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FREM(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80SCALE> {
static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
return X80SoftFloat::FSCALE(Src1, Src2);
}
};
template<>
struct OpHandlers<IR::OP_F80BCDSTORE> {
static X80SoftFloat handle(X80SoftFloat Src1) {
bool Negative = Src1.Sign;
// Clear the Sign bit
Src1.Sign = 0;
uint64_t Tmp = Src1;
X80SoftFloat Rv;
uint8_t *BCD = reinterpret_cast<uint8_t*>(&Rv);
memset(BCD, 0, 10);
for (size_t i = 0; i < 9; ++i) {
if (Tmp == 0) {
// Nothing left? Just leave
break;
}
// Extract the lower 100 values
uint8_t Digit = Tmp % 100;
// Now divide it for the next iteration
Tmp /= 100;
uint8_t UpperNibble = Digit / 10;
uint8_t LowerNibble = Digit % 10;
// Now store the BCD
BCD[i] = (UpperNibble << 4) | LowerNibble;
}
// Set negative flag once converted to x87
BCD[9] = Negative ? 0x80 : 0;
return Rv;
}
};
template<>
struct OpHandlers<IR::OP_F80BCDLOAD> {
static X80SoftFloat handle(X80SoftFloat Src) {
uint8_t *Src1 = reinterpret_cast<uint8_t *>(&Src);
uint64_t BCD{};
// We walk through each uint8_t and pull out the BCD encoding
// Each 4bit split is a digit
// Only 0-9 is supported, A-F results in undefined data
// | 4 bit | 4 bit |
// | 10s place | 1s place |
// EG 0x48 = 48
// EG 0x4847 = 4847
// This gives us an 18digit value encoded in BCD
// The last byte lets us know if it negative or not
for (size_t i = 0; i < 9; ++i) {
uint8_t Digit = Src1[8 - i];
// First shift our last value over
BCD *= 100;
// Add the tens place digit
BCD += (Digit >> 4) * 10;
// Add the ones place digit
BCD += Digit & 0xF;
}
// Set negative flag once converted to x87
bool Negative = Src1[9] & 0x80;
X80SoftFloat Tmp;
Tmp = BCD;
Tmp.Sign = Negative;
return Tmp;
}
};
template<>
struct OpHandlers<IR::OP_F80LOADFCW> {
static void handle(uint16_t NewFCW) {
auto PC = (NewFCW >> 8) & 3;
switch(PC) {
case 0: extF80_roundingPrecision = 32; break;
case 2: extF80_roundingPrecision = 64; break;
case 3: extF80_roundingPrecision = 80; break;
case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
}
auto RC = (NewFCW >> 10) & 3;
switch(RC) {
case 0:
softfloat_roundingMode = softfloat_round_near_even;
break;
case 1:
softfloat_roundingMode = softfloat_round_min;
break;
case 2:
softfloat_roundingMode = softfloat_round_max;
break;
case 3:
softfloat_roundingMode = softfloat_round_minMag;
break;
}
}
};
}

View File

@ -0,0 +1,27 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(GetHostFlag) {
auto Op = IROp->C<IR::IROp_GetHostFlag>();
GD = (*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]) >> Op->Flag) & 1;
}
#undef DEF_OP
void InterpreterOps::RegisterFlagHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(GETHOSTFLAG, GetHostFlag);
#undef REGISTER_OP
}
}

View File

@ -32,19 +32,16 @@ public:
bool HandleSIGBUS(int Signal, void *info, void *ucontext);
static void InitializeInterpreterOpHandlers();
private:
FEXCore::Context::Context *CTX;
FEXCore::Core::InternalThreadState *State;
uint32_t AllocateTmpSpace(size_t Size);
template<typename Res>
Res GetDest(void* SSAData, IR::OrderedNodeWrapper Op);
template<typename Res>
Res GetSrc(void* SSAData, IR::OrderedNodeWrapper Src);
std::unique_ptr<Dispatcher> Dispatcher{};
};
}

View File

@ -95,10 +95,27 @@ bool InterpreterCore::HandleSIGBUS(int Signal, void *info, void *ucontext) {
return false;
}
void InitializeInterpreterOpHandlers() {
for (uint32_t i = 0; i <= FEXCore::IR::IROps::OP_LAST; ++i) {
InterpreterOps::OpHandlers[i] = &InterpreterOps::Op_Unhandled;
}
InterpreterOps::RegisterALUHandlers();
InterpreterOps::RegisterAtomicHandlers();
InterpreterOps::RegisterBranchHandlers();
InterpreterOps::RegisterConversionHandlers();
InterpreterOps::RegisterFlagHandlers();
InterpreterOps::RegisterMemoryHandlers();
InterpreterOps::RegisterMiscHandlers();
InterpreterOps::RegisterMoveHandlers();
InterpreterOps::RegisterVectorHandlers();
InterpreterOps::RegisterEncryptionHandlers();
InterpreterOps::RegisterF80Handlers();
}
InterpreterCore::InterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread)
: CTX {ctx}
, State {Thread} {
// Grab our space for temporary data
if (!CompileThread &&
CTX->Config.Core == FEXCore::Config::CONFIG_INTERPRETER) {

View File

@ -13,6 +13,7 @@ namespace FEXCore::Core {
namespace FEXCore::CPU {
class CPUBackend;
void InitializeInterpreterOpHandlers();
std::unique_ptr<CPUBackend> CreateInterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread);
}

View File

@ -0,0 +1,179 @@
#pragma once
#include <FEXCore/IR/IR.h>
#define GD *GetDest<uint64_t*>(Data->SSAData, Node)
#define GDP GetDest<void*>(Data->SSAData, Node)
#define DO_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(GDP); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
*Dst_d = func(*Src1_d, *Src2_d); \
break; \
}
#define DO_SCALAR_COMPARE_OP(size, type, type2, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type2*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
Dst_d[0] = func(Src1_d[0], Src2_d[0]); \
break; \
}
#define DO_VECTOR_COMPARE_OP(size, type, type2, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type2*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i], Src2_d[i]); \
} \
break; \
}
#define DO_VECTOR_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i], Src2_d[i]); \
} \
break; \
}
#define DO_VECTOR_PAIR_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i*2], Src1_d[i*2 + 1]); \
Dst_d[i+Elements] = func(Src2_d[i*2], Src2_d[i*2 + 1]); \
} \
break; \
}
#define DO_VECTOR_SCALAR_OP(size, type, func)\
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i], *Src2_d); \
} \
break; \
}
#define DO_VECTOR_0SRC_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(); \
} \
break; \
}
#define DO_VECTOR_1SRC_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src_d[i]); \
} \
break; \
}
#define DO_VECTOR_REDUCE_1SRC_OP(size, type, func, start_val) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type*>(Src); \
type begin = start_val; \
for (uint8_t i = 0; i < Elements; ++i) { \
begin = func(begin, Src_d[i]); \
} \
Dst_d[0] = begin; \
break; \
}
#define DO_VECTOR_SAT_OP(size, type, func, min, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i], Src2_d[i], min, max); \
} \
break; \
}
#define DO_VECTOR_1SRC_2TYPE_OP(size, type, type2, func, min, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func(Src_d[i], min, max); \
} \
break; \
}
#define DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(type, type2, func, min, max) \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func(Src_d[i], min, max); \
}
#define DO_VECTOR_1SRC_2TYPE_OP_TOP(size, type, type2, func, min, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src2); \
memcpy(Dst_d, Src1, Elements * sizeof(type2));\
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i+Elements] = (type)func(Src_d[i], min, max); \
} \
break; \
}
#define DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(size, type, type2, func, min, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func(Src_d[i+Elements], min, max); \
} \
break; \
}
#define DO_VECTOR_2SRC_2TYPE_OP(size, type, type2, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type2*>(Src1); \
auto *Src2_d = reinterpret_cast<type2*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func((type)Src1_d[i], (type)Src2_d[i]); \
} \
break; \
}
#define DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(size, type, type2, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type2*>(Src1); \
auto *Src2_d = reinterpret_cast<type2*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func((type)Src1_d[i+Elements], (type)Src2_d[i+Elements]); \
} \
break; \
}
template<typename Res>
Res GetDest(void* SSAData, FEXCore::IR::OrderedNodeWrapper Op) {
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op.ID()];
return reinterpret_cast<Res>(DstPtr);
}
template<typename Res>
Res GetDest(void* SSAData, uint32_t Op) {
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op];
return reinterpret_cast<Res>(DstPtr);
}
template<typename Res>
Res GetSrc(void* SSAData, FEXCore::IR::OrderedNodeWrapper Src) {
auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Src.ID()];
return reinterpret_cast<Res>(DstPtr);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,9 @@
#pragma once
#include <stdint.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IntrusiveIRList.h>
namespace FEXCore::Core {
struct InternalThreadState;
}
@ -42,5 +45,365 @@ namespace FEXCore::CPU {
public:
static void InterpretIR(FEXCore::Core::InternalThreadState *Thread, uint64_t Entry, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData);
static bool GetFallbackHandler(IR::IROp_Header *IROp, FallbackInfo *Info);
static void RegisterALUHandlers();
static void RegisterAtomicHandlers();
static void RegisterBranchHandlers();
static void RegisterConversionHandlers();
static void RegisterFlagHandlers();
static void RegisterMemoryHandlers();
static void RegisterMiscHandlers();
static void RegisterMoveHandlers();
static void RegisterVectorHandlers();
static void RegisterEncryptionHandlers();
static void RegisterF80Handlers();
struct IROpData {
FEXCore::Core::InternalThreadState *State{};
uint64_t CurrentEntry{};
FEXCore::IR::IRListView *CurrentIR{};
volatile void *StackEntry{};
void *SSAData{};
struct {
bool Quit;
bool Redo;
} BlockResults{};
IR::NodeIterator BlockIterator{0, 0};
};
using OpHandler = std::function<void(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)>;
static std::array<OpHandler, FEXCore::IR::IROps::OP_LAST + 1> OpHandlers;
#define DEF_OP(x) static void Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
///< Unhandled handler
DEF_OP(Unhandled);
///< No-op Handler
DEF_OP(NoOp);
///< ALU Ops
DEF_OP(TruncElementPair);
DEF_OP(Constant);
DEF_OP(EntrypointOffset);
DEF_OP(InlineConstant);
DEF_OP(InlineEntrypointOffset);
DEF_OP(CycleCounter);
DEF_OP(Add);
DEF_OP(Sub);
DEF_OP(Neg);
DEF_OP(Mul);
DEF_OP(UMul);
DEF_OP(Div);
DEF_OP(UDiv);
DEF_OP(Rem);
DEF_OP(URem);
DEF_OP(MulH);
DEF_OP(UMulH);
DEF_OP(Or);
DEF_OP(And);
DEF_OP(Xor);
DEF_OP(Lshl);
DEF_OP(Lshr);
DEF_OP(Ashr);
DEF_OP(Rol);
DEF_OP(Ror);
DEF_OP(Extr);
DEF_OP(LDiv);
DEF_OP(LUDiv);
DEF_OP(LRem);
DEF_OP(LURem);
DEF_OP(Zext);
DEF_OP(Not);
DEF_OP(Popcount);
DEF_OP(FindLSB);
DEF_OP(FindMSB);
DEF_OP(FindTrailingZeros);
DEF_OP(CountLeadingZeroes);
DEF_OP(Rev);
DEF_OP(Bfi);
DEF_OP(Bfe);
DEF_OP(Sbfe);
DEF_OP(Select);
DEF_OP(VExtractToGPR);
DEF_OP(Float_ToGPR_ZU);
DEF_OP(Float_ToGPR_ZS);
DEF_OP(Float_ToGPR_S);
DEF_OP(FCmp);
///< Atomic ops
DEF_OP(CASPair);
DEF_OP(CAS);
DEF_OP(AtomicAdd);
DEF_OP(AtomicSub);
DEF_OP(AtomicAnd);
DEF_OP(AtomicOr);
DEF_OP(AtomicXor);
DEF_OP(AtomicSwap);
DEF_OP(AtomicFetchAdd);
DEF_OP(AtomicFetchSub);
DEF_OP(AtomicFetchAnd);
DEF_OP(AtomicFetchOr);
DEF_OP(AtomicFetchXor);
DEF_OP(AtomicFetchNeg);
///< Branch ops
DEF_OP(GuestCallDirect);
DEF_OP(GuestCallIndirect);
DEF_OP(GuestReturn);
DEF_OP(SignalReturn);
DEF_OP(CallbackReturn);
DEF_OP(ExitFunction);
DEF_OP(Jump);
DEF_OP(CondJump);
DEF_OP(Syscall);
DEF_OP(Thunk);
DEF_OP(ValidateCode);
DEF_OP(RemoveCodeEntry);
DEF_OP(CPUID);
///< Conversion ops
DEF_OP(VInsGPR);
DEF_OP(VCastFromGPR);
DEF_OP(Float_FromGPR_S);
DEF_OP(Float_FToF);
DEF_OP(Vector_SToF);
DEF_OP(Vector_FToZS);
DEF_OP(Vector_FToS);
DEF_OP(Vector_FToF);
DEF_OP(Vector_FToI);
///< Flag ops
DEF_OP(GetHostFlag);
///< Memory ops
DEF_OP(LoadContext);
DEF_OP(StoreContext);
DEF_OP(LoadRegister);
DEF_OP(StoreRegister);
DEF_OP(LoadContextIndexed);
DEF_OP(StoreContextIndexed);
DEF_OP(SpillRegister);
DEF_OP(FillRegister);
DEF_OP(LoadFlag);
DEF_OP(StoreFlag);
DEF_OP(LoadMem);
DEF_OP(StoreMem);
DEF_OP(VLoadMemElement);
DEF_OP(VStoreMemElement);
DEF_OP(CacheLineClear);
///< Misc ops
DEF_OP(EndBlock);
DEF_OP(Fence);
DEF_OP(Break);
DEF_OP(Phi);
DEF_OP(PhiValue);
DEF_OP(Print);
DEF_OP(GetRoundingMode);
DEF_OP(SetRoundingMode);
///< Move ops
DEF_OP(ExtractElementPair);
DEF_OP(CreateElementPair);
DEF_OP(Mov);
///< Vector ops
DEF_OP(VectorZero);
DEF_OP(VectorImm);
DEF_OP(CreateVector2);
DEF_OP(CreateVector4);
DEF_OP(SplatVector);
DEF_OP(VMov);
DEF_OP(VAnd);
DEF_OP(VBic);
DEF_OP(VOr);
DEF_OP(VXor);
DEF_OP(VAdd);
DEF_OP(VSub);
DEF_OP(VUQAdd);
DEF_OP(VUQSub);
DEF_OP(VSQAdd);
DEF_OP(VSQSub);
DEF_OP(VAddP);
DEF_OP(VAddV);
DEF_OP(VUMinV);
DEF_OP(VURAvg);
DEF_OP(VAbs);
DEF_OP(VPopcount);
DEF_OP(VFAdd);
DEF_OP(VFAddP);
DEF_OP(VFSub);
DEF_OP(VFMul);
DEF_OP(VFDiv);
DEF_OP(VFMin);
DEF_OP(VFMax);
DEF_OP(VFRecp);
DEF_OP(VFSqrt);
DEF_OP(VFRSqrt);
DEF_OP(VNeg);
DEF_OP(VFNeg);
DEF_OP(VNot);
DEF_OP(VUMin);
DEF_OP(VSMin);
DEF_OP(VUMax);
DEF_OP(VSMax);
DEF_OP(VZip);
DEF_OP(VUnZip);
DEF_OP(VBSL);
DEF_OP(VCMPEQ);
DEF_OP(VCMPEQZ);
DEF_OP(VCMPGT);
DEF_OP(VCMPGTZ);
DEF_OP(VCMPLTZ);
DEF_OP(VFCMPEQ);
DEF_OP(VFCMPNEQ);
DEF_OP(VFCMPLT);
DEF_OP(VFCMPGT);
DEF_OP(VFCMPLE);
DEF_OP(VFCMPORD);
DEF_OP(VFCMPUNO);
DEF_OP(VUShl);
DEF_OP(VUShr);
DEF_OP(VSShr);
DEF_OP(VUShlS);
DEF_OP(VUShrS);
DEF_OP(VSShrS);
DEF_OP(VInsElement);
DEF_OP(VInsScalarElement);
DEF_OP(VExtractElement);
DEF_OP(VDupElement);
DEF_OP(VExtr);
DEF_OP(VSLI);
DEF_OP(VSRI);
DEF_OP(VUShrI);
DEF_OP(VSShrI);
DEF_OP(VShlI);
DEF_OP(VUShrNI);
DEF_OP(VUShrNI2);
DEF_OP(VBitcast);
DEF_OP(VSXTL);
DEF_OP(VSXTL2);
DEF_OP(VUXTL);
DEF_OP(VUXTL2);
DEF_OP(VSQXTN);
DEF_OP(VSQXTN2);
DEF_OP(VSQXTUN);
DEF_OP(VSQXTUN2);
DEF_OP(VUMul);
DEF_OP(VUMull);
DEF_OP(VSMul);
DEF_OP(VSMull);
DEF_OP(VUMull2);
DEF_OP(VSMull2);
DEF_OP(VUABDL);
DEF_OP(VTBL1);
///< Encryption ops
DEF_OP(AESImc);
DEF_OP(AESEnc);
DEF_OP(AESEncLast);
DEF_OP(AESDec);
DEF_OP(AESDecLast);
DEF_OP(AESKeyGenAssist);
///< F80 ops
DEF_OP(F80LOADFCW);
DEF_OP(F80ADD);
DEF_OP(F80SUB);
DEF_OP(F80MUL);
DEF_OP(F80DIV);
DEF_OP(F80FYL2X);
DEF_OP(F80ATAN);
DEF_OP(F80FPREM1);
DEF_OP(F80FPREM);
DEF_OP(F80SCALE);
DEF_OP(F80CVT);
DEF_OP(F80CVTINT);
DEF_OP(F80CVTTO);
DEF_OP(F80CVTTOINT);
DEF_OP(F80ROUND);
DEF_OP(F80F2XM1);
DEF_OP(F80TAN);
DEF_OP(F80SQRT);
DEF_OP(F80SIN);
DEF_OP(F80COS);
DEF_OP(F80XTRACT_EXP);
DEF_OP(F80XTRACT_SIG);
DEF_OP(F80CMP);
DEF_OP(F80BCDLOAD);
DEF_OP(F80BCDSTORE);
#undef DEF_OP
template<typename unsigned_type, typename signed_type, typename float_type>
static bool IsConditionTrue(uint8_t Cond, uint64_t Src1, uint64_t Src2) {
bool CompResult = false;
switch (Cond) {
case FEXCore::IR::COND_EQ:
CompResult = static_cast<unsigned_type>(Src1) == static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_NEQ:
CompResult = static_cast<unsigned_type>(Src1) != static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_SGE:
CompResult = static_cast<signed_type>(Src1) >= static_cast<signed_type>(Src2);
break;
case FEXCore::IR::COND_SLT:
CompResult = static_cast<signed_type>(Src1) < static_cast<signed_type>(Src2);
break;
case FEXCore::IR::COND_SGT:
CompResult = static_cast<signed_type>(Src1) > static_cast<signed_type>(Src2);
break;
case FEXCore::IR::COND_SLE:
CompResult = static_cast<signed_type>(Src1) <= static_cast<signed_type>(Src2);
break;
case FEXCore::IR::COND_UGE:
CompResult = static_cast<unsigned_type>(Src1) >= static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_ULT:
CompResult = static_cast<unsigned_type>(Src1) < static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_UGT:
CompResult = static_cast<unsigned_type>(Src1) > static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_ULE:
CompResult = static_cast<unsigned_type>(Src1) <= static_cast<unsigned_type>(Src2);
break;
case FEXCore::IR::COND_FLU:
CompResult = reinterpret_cast<float_type&>(Src1) < reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_FGE:
CompResult = reinterpret_cast<float_type&>(Src1) >= reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_FLEU:
CompResult = reinterpret_cast<float_type&>(Src1) <= reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_FGT:
CompResult = reinterpret_cast<float_type&>(Src1) > reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_FU:
CompResult = (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_FNU:
CompResult = !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
break;
case FEXCore::IR::COND_MI:
case FEXCore::IR::COND_PL:
case FEXCore::IR::COND_VS:
case FEXCore::IR::COND_VC:
default:
LOGMAN_MSG_A_FMT("Unsupported compare type");
break;
}
return CompResult;
}
static uint8_t GetOpSize(FEXCore::IR::IRListView *CurrentIR, IR::OrderedNodeWrapper Node) {
auto IROp = CurrentIR->GetOp<FEXCore::IR::IROp_Header>(Node);
return IROp->Size;
}
};
};

View File

@ -0,0 +1,289 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
static inline void CacheLineFlush(char *Addr) {
#ifdef _M_X86_64
__asm volatile (
"clflush (%[Addr]);"
:: [Addr] "r" (Addr)
: "memory");
#else
__builtin___clear_cache(Addr, Addr+64);
#endif
}
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(LoadContext) {
auto Op = IROp->C<IR::IROp_LoadContext>();
uint8_t OpSize = IROp->Size;
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += Op->Offset;
#define LOAD_CTX(x, y) \
case x: { \
y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
GD = *MemData; \
break; \
}
switch (OpSize) {
LOAD_CTX(1, uint8_t)
LOAD_CTX(2, uint16_t)
LOAD_CTX(4, uint32_t)
LOAD_CTX(8, uint64_t)
case 16: {
void const *MemData = reinterpret_cast<void const*>(ContextPtr);
memcpy(GDP, MemData, OpSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
}
#undef LOAD_CTX
}
DEF_OP(StoreContext) {
auto Op = IROp->C<IR::IROp_StoreContext>();
uint8_t OpSize = IROp->Size;
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += Op->Offset;
void *MemData = reinterpret_cast<void*>(ContextPtr);
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
memcpy(MemData, Src, OpSize);
}
DEF_OP(LoadRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(StoreRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(LoadContextIndexed) {
auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += Op->BaseOffset;
ContextPtr += Index * Op->Stride;
#define LOAD_CTX(x, y) \
case x: { \
y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
GD = *MemData; \
break; \
}
switch (Op->Size) {
LOAD_CTX(1, uint8_t)
LOAD_CTX(2, uint16_t)
LOAD_CTX(4, uint32_t)
LOAD_CTX(8, uint64_t)
case 16: {
void const *MemData = reinterpret_cast<void const*>(ContextPtr);
memcpy(GDP, MemData, Op->Size);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", Op->Size);
}
#undef LOAD_CTX
}
DEF_OP(StoreContextIndexed) {
auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += Op->BaseOffset;
ContextPtr += Index * Op->Stride;
void *MemData = reinterpret_cast<void*>(ContextPtr);
void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
memcpy(MemData, Src, Op->Size);
}
DEF_OP(SpillRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(FillRegister) {
LOGMAN_MSG_A_FMT("Unimplemented");
}
DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
GD = *MemData;
}
DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
uint8_t Arg = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
*MemData = Arg;
}
DEF_OP(LoadMem) {
auto Op = IROp->C<IR::IROp_LoadMem>();
uint8_t OpSize = IROp->Size;
uint8_t const *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
}
}
memset(GDP, 0, 16);
switch (OpSize) {
case 1: {
const uint8_t *D = (const uint8_t*)MemData;
GD = *D;
break;
}
case 2: {
const uint16_t *D = (const uint16_t*)MemData;
GD = *D;
break;
}
case 4: {
const uint32_t *D = (const uint32_t*)MemData;
GD = *D;
break;
}
case 8: {
const uint64_t *D = (const uint64_t*)MemData;
GD = *D;
break;
}
default:
memcpy(GDP, MemData, Op->Size);
break;
}
}
DEF_OP(StoreMem) {
auto Op = IROp->C<IR::IROp_StoreMem>();
uint8_t OpSize = IROp->Size;
uint8_t *MemData = *GetSrc<uint8_t **>(Data->SSAData, Op->Addr);
if (!Op->Offset.IsInvalid()) {
auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
switch(Op->OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val: MemData += Offset; break;
case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
}
}
switch (OpSize) {
case 1: {
*reinterpret_cast<uint8_t*>(MemData) = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
break;
}
case 2: {
*reinterpret_cast<uint16_t*>(MemData) = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
break;
}
case 4: {
*reinterpret_cast<uint32_t*>(MemData) = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
break;
}
case 8: {
*reinterpret_cast<uint64_t*>(MemData) = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
break;
}
default:
memcpy(MemData, GetSrc<void*>(Data->SSAData, Op->Value), Op->Size);
break;
}
}
DEF_OP(VLoadMemElement) {
auto Op = IROp->C<IR::IROp_VLoadMemElement>();
void const *MemData = *GetSrc<void const**>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[1]), 16);
memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(GDP) + (Op->Header.ElementSize * Op->Index)),
MemData, Op->Header.ElementSize);
}
DEF_OP(VStoreMemElement) {
#define STORE_DATA(x, y) \
case x: { \
y *MemData = *GetSrc<y**>(Data->SSAData, Op->Header.Args[0]); \
memcpy(MemData, &GetSrc<y*>(Data->SSAData, Op->Header.Args[1])[Op->Index], sizeof(y)); \
break; \
}
auto Op = IROp->C<IR::IROp_VStoreMemElement>();
uint8_t OpSize = IROp->Size;
switch (OpSize) {
STORE_DATA(1, uint8_t)
STORE_DATA(2, uint16_t)
STORE_DATA(4, uint32_t)
STORE_DATA(8, uint64_t)
default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size"); break;
}
#undef STORE_DATA
}
DEF_OP(CacheLineClear) {
auto Op = IROp->C<IR::IROp_CacheLineClear>();
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
// 64-byte cache line clear
CacheLineFlush(MemData);
}
#undef DEF_OP
void InterpreterOps::RegisterMemoryHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(LOADCONTEXT, LoadContext);
REGISTER_OP(STORECONTEXT, StoreContext);
REGISTER_OP(LOADREGISTER, LoadRegister);
REGISTER_OP(STOREREGISTER, StoreRegister);
REGISTER_OP(LOADCONTEXTINDEXED, LoadContextIndexed);
REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed);
REGISTER_OP(SPILLREGISTER, SpillRegister);
REGISTER_OP(FILLREGISTER, FillRegister);
REGISTER_OP(LOADFLAG, LoadFlag);
REGISTER_OP(STOREFLAG, StoreFlag);
REGISTER_OP(LOADMEM, LoadMem);
REGISTER_OP(STOREMEM, StoreMem);
REGISTER_OP(LOADMEMTSO, LoadMem);
REGISTER_OP(STOREMEMTSO, StoreMem);
REGISTER_OP(VLOADMEMELEMENT, VLoadMemElement);
REGISTER_OP(VSTOREMEMELEMENT, VStoreMemElement);
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,158 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
#ifdef _M_X86_64
#include <xmmintrin.h>
#endif
namespace FEXCore::CPU {
[[noreturn]]
static void StopThread(FEXCore::Core::InternalThreadState *Thread) {
Thread->CTX->StopThread(Thread);
LOGMAN_MSG_A_FMT("unreachable");
FEX_UNREACHABLE;
}
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(Fence) {
auto Op = IROp->C<IR::IROp_Fence>();
switch (Op->Fence) {
case IR::Fence_Load.Val:
std::atomic_thread_fence(std::memory_order_acquire);
break;
case IR::Fence_LoadStore.Val:
std::atomic_thread_fence(std::memory_order_seq_cst);
break;
case IR::Fence_Store.Val:
std::atomic_thread_fence(std::memory_order_release);
break;
default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
}
}
DEF_OP(Break) {
auto Op = IROp->C<IR::IROp_Break>();
switch (Op->Reason) {
case 4: // HLT
StopThread(Data->State);
break;
default: LOGMAN_MSG_A_FMT("Unknown Break Reason: {}", Op->Reason); break;
}
}
DEF_OP(GetRoundingMode) {
uint32_t GuestRounding{};
#ifdef _M_ARM_64
uint64_t Tmp{};
__asm(R"(
mrs %[Tmp], FPCR;
)"
: [Tmp] "=r" (Tmp));
// Extract the rounding
// On ARM the ordering is different than on x86
GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0;
uint8_t RoundingMode = (Tmp >> 22) & 0b11;
if (RoundingMode == 0)
GuestRounding |= IR::ROUND_MODE_NEAREST;
else if (RoundingMode == 1)
GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY;
else if (RoundingMode == 2)
GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY;
else if (RoundingMode == 3)
GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO;
#else
GuestRounding = _mm_getcsr();
// Extract the rounding
GuestRounding = (GuestRounding >> 13) & 0b111;
#endif
memcpy(GDP, &GuestRounding, sizeof(GuestRounding));
}
DEF_OP(SetRoundingMode) {
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
uint8_t GuestRounding = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
#ifdef _M_ARM_64
uint64_t HostRounding{};
__asm volatile(R"(
mrs %[Tmp], FPCR;
)"
: [Tmp] "=r" (HostRounding));
// Mask out the rounding
HostRounding &= ~(0b111 << 22);
HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0;
uint8_t RoundingMode = GuestRounding & 0b11;
if (RoundingMode == IR::ROUND_MODE_NEAREST)
HostRounding |= (0b00U << 22);
else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY)
HostRounding |= (0b01U << 22);
else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY)
HostRounding |= (0b10U << 22);
else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO)
HostRounding |= (0b11U << 22);
__asm volatile(R"(
msr FPCR, %[Tmp];
)"
:: [Tmp] "r" (HostRounding));
#else
uint32_t HostRounding = _mm_getcsr();
// Cut out the host rounding mode
HostRounding &= ~(0b111 << 13);
// Insert our new rounding mode
HostRounding |= GuestRounding << 13;
_mm_setcsr(HostRounding);
#endif
}
DEF_OP(Print) {
auto Op = IROp->C<IR::IROp_Print>();
uint8_t OpSize = IROp->Size;
if (OpSize <= 8) {
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src);
}
else if (OpSize == 16) {
__uint128_t Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
uint64_t Src0 = Src;
uint64_t Src1 = Src >> 64;
LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0);
LogMan::Msg::IFmt(" Value[1] in Arg: 0x{:x}, {}", Src1, Src1);
}
else
LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize);
}
#undef DEF_OP
void InterpreterOps::RegisterMiscHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(DUMMY, NoOp);
REGISTER_OP(IRHEADER, NoOp);
REGISTER_OP(CODEBLOCK, NoOp);
REGISTER_OP(BEGINBLOCK, NoOp);
REGISTER_OP(ENDBLOCK, NoOp);
REGISTER_OP(FENCE, Fence);
REGISTER_OP(BREAK, Break);
REGISTER_OP(PHI, NoOp);
REGISTER_OP(PHIVALUE, NoOp);
REGISTER_OP(PRINT, Print);
REGISTER_OP(GETROUNDINGMODE, GetRoundingMode);
REGISTER_OP(SETROUNDINGMODE, SetRoundingMode);
REGISTER_OP(INVALIDATEFLAGS, NoOp);
#undef REGISTER_OP
}
}

View File

@ -0,0 +1,50 @@
/*
$info$
tags: backend|interpreter
$end_info$
*/
#include "Interface/Core/Interpreter/InterpreterClass.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/InterpreterDefines.h"
#include <cstdint>
namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
DEF_OP(ExtractElementPair) {
auto Op = IROp->C<IR::IROp_ExtractElementPair>();
uintptr_t Src = GetSrc<uintptr_t>(Data->SSAData, Op->Header.Args[0]);
memcpy(GDP,
reinterpret_cast<void*>(Src + Op->Header.Size * Op->Element), Op->Header.Size);
}
DEF_OP(CreateElementPair) {
auto Op = IROp->C<IR::IROp_CreateElementPair>();
void *Src_Lower = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
void *Src_Upper = GetSrc<void*>(Data->SSAData, Op->Header.Args[1]);
uint8_t *Dst = GetDest<uint8_t*>(Data->SSAData, Node);
memcpy(Dst, Src_Lower, Op->Header.Size);
memcpy(Dst + Op->Header.Size, Src_Upper, Op->Header.Size);
}
DEF_OP(Mov) {
auto Op = IROp->C<IR::IROp_Mov>();
uint8_t OpSize = IROp->Size;
memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), OpSize);
}
#undef DEF_OP
void InterpreterOps::RegisterMoveHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair);
REGISTER_OP(CREATEELEMENTPAIR, CreateElementPair);
REGISTER_OP(MOV, Mov);
#undef REGISTER_OP
}
}

File diff suppressed because it is too large Load Diff