Merge pull request #1307 from Sonicadvance1/InterpreterDispatcher

Interpreter: Splits ops in to separate files
2025-03-01 11:18:42 +00:00 · 2021-10-21 16:22:45 -07:00 · 2021-10-21 16:22:45 -07:00 · e9937d9a85
commit e9937d9a85
parent 28d084bf78 a40a0cbb12
20 changed files with 6542 additions and 5405 deletions
--- a/External/FEXCore/Source/CMakeLists.txt
+++ b/External/FEXCore/Source/CMakeLists.txt
@ -97,6 +97,17 @@ set (SRCS
  Interface/Core/Dispatcher/Arm64Dispatcher.cpp
  Interface/Core/Interpreter/InterpreterCore.cpp
  Interface/Core/Interpreter/InterpreterOps.cpp
+  Interface/Core/Interpreter/ALUOps.cpp
+  Interface/Core/Interpreter/AtomicOps.cpp
+  Interface/Core/Interpreter/BranchOps.cpp
+  Interface/Core/Interpreter/ConversionOps.cpp
+  Interface/Core/Interpreter/EncryptionOps.cpp
+  Interface/Core/Interpreter/F80Ops.cpp
+  Interface/Core/Interpreter/FlagOps.cpp
+  Interface/Core/Interpreter/MemoryOps.cpp
+  Interface/Core/Interpreter/MiscOps.cpp
+  Interface/Core/Interpreter/MoveOps.cpp
+  Interface/Core/Interpreter/VectorOps.cpp
  Interface/Core/X86Tables/BaseTables.cpp
  Interface/Core/X86Tables/DDDTables.cpp
  Interface/Core/X86Tables/EVEXTables.cpp
--- a/External/FEXCore/Source/Interface/Core/Core.cpp
+++ b/External/FEXCore/Source/Interface/Core/Core.cpp
@ -249,6 +249,8 @@ namespace FEXCore::Context {

    LocalLoader = Loader;
    using namespace FEXCore::Core;
+
+    FEXCore::CPU::InitializeInterpreterOpHandlers();
    FEXCore::Core::CPUState NewThreadState = CreateDefaultCPUState();
    FEXCore::Core::InternalThreadState *Thread = CreateThread(&NewThreadState, 0);

--- a/External/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/ALUOps.cpp
--- a/External/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/AtomicOps.cpp
@ -0,0 +1,793 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <FEXCore/Utils/BitUtils.h>
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+
+#ifdef _M_X86_64
+static uint8_t AtomicFetchNeg(uint8_t *Addr) {
+  using Type = uint8_t;
+  std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
+  Type Expected = MemData->load();
+  Type Desired = -Expected;
+  do {
+    Desired = -Expected;
+  } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
+
+  return Expected;
+}
+
+static uint16_t AtomicFetchNeg(uint16_t *Addr) {
+  using Type = uint16_t;
+  std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
+  Type Expected = MemData->load();
+  Type Desired = -Expected;
+  do {
+    Desired = -Expected;
+  } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
+
+  return Expected;
+}
+
+static uint32_t AtomicFetchNeg(uint32_t *Addr) {
+  using Type = uint32_t;
+  std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
+  Type Expected = MemData->load();
+  Type Desired = -Expected;
+  do {
+    Desired = -Expected;
+  } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
+
+  return Expected;
+}
+
+static uint64_t AtomicFetchNeg(uint64_t *Addr) {
+  using Type = uint64_t;
+  std::atomic<Type> *MemData = reinterpret_cast<std::atomic<Type>*>(Addr);
+  Type Expected = MemData->load();
+  Type Desired = -Expected;
+  do {
+    Desired = -Expected;
+  } while (!MemData->compare_exchange_strong(Expected, Desired, std::memory_order_seq_cst));
+
+  return Expected;
+}
+
+template<typename T>
+static T AtomicCompareAndSwap(T expected, T desired, T *addr)
+{
+  std::atomic<T> *MemData = reinterpret_cast<std::atomic<T>*>(addr);
+
+  T Src1 = expected;
+  T Src2 = desired;
+
+  T Expected = Src1;
+  bool Result = MemData->compare_exchange_strong(Expected, Src2);
+
+  return Result ? Src1 : Expected;
+}
+#else
+// Needs to match what the AArch64 JIT and unaligned signal handler expects
+uint8_t AtomicFetchNeg(uint8_t *Addr) {
+  using Type = uint8_t;
+  Type Result{};
+  Type Tmp{};
+  Type TmpStatus{};
+
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxrb %w[Result], [%[Memory]];
+    neg %w[Tmp], %w[Result];
+    stlxrb %w[TmpStatus], %w[Tmp], [%[Memory]];
+    cbnz %w[TmpStatus], 1b;
+  )"
+  : [Result] "=r" (Result)
+  , [Tmp] "=r" (Tmp)
+  , [TmpStatus] "=r" (TmpStatus)
+  , [Memory] "+r" (Addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+uint16_t AtomicFetchNeg(uint16_t *Addr) {
+  using Type = uint16_t;
+  Type Result{};
+  Type Tmp{};
+  Type TmpStatus{};
+
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxrh %w[Result], [%[Memory]];
+    neg %w[Tmp], %w[Result];
+    stlxrh %w[TmpStatus], %w[Tmp], [%[Memory]];
+    cbnz %w[TmpStatus], 1b;
+  )"
+  : [Result] "=r" (Result)
+  , [Tmp] "=r" (Tmp)
+  , [TmpStatus] "=r" (TmpStatus)
+  , [Memory] "+r" (Addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+uint32_t AtomicFetchNeg(uint32_t *Addr) {
+  using Type = uint32_t;
+  Type Result{};
+  Type Tmp{};
+  Type TmpStatus{};
+
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxr %w[Result], [%[Memory]];
+    neg %w[Tmp], %w[Result];
+    stlxr %w[TmpStatus], %w[Tmp], [%[Memory]];
+    cbnz %w[TmpStatus], 1b;
+  )"
+  : [Result] "=r" (Result)
+  , [Tmp] "=r" (Tmp)
+  , [TmpStatus] "=r" (TmpStatus)
+  , [Memory] "+r" (Addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+uint64_t AtomicFetchNeg(uint64_t *Addr) {
+  using Type = uint64_t;
+  Type Result{};
+  Type Tmp{};
+  Type TmpStatus{};
+
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxr %[Result], [%[Memory]];
+    neg %[Tmp], %[Result];
+    stlxr %w[TmpStatus], %[Tmp], [%[Memory]];
+    cbnz %w[TmpStatus], 1b;
+  )"
+  : [Result] "=r" (Result)
+  , [Tmp] "=r" (Tmp)
+  , [TmpStatus] "=r" (TmpStatus)
+  , [Memory] "+r" (Addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+template<typename T>
+static T AtomicCompareAndSwap(T expected, T desired, T *addr);
+
+template<>
+uint8_t AtomicCompareAndSwap(uint8_t expected, uint8_t desired, uint8_t *addr) {
+  using Type = uint8_t;
+  //force Result to r9 (scratch register) or clang spills to stack
+  register Type Result asm("r9"){};
+  Type Tmp{};
+  Type Tmp2{};
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxrb %w[Tmp], [%[Memory]];
+    cmp %w[Tmp], %w[Expected], uxtb;
+    b.ne 2f;
+    stlxrb %w[Tmp2], %w[Desired], [%[Memory]];
+    cbnz %w[Tmp2], 1b;
+    mov %w[Result], %w[Expected];
+    b 3f;
+  2:
+    mov %w[Result], %w[Tmp];
+    clrex;
+  3:
+  )"
+  : [Tmp] "=r" (Tmp)
+  , [Tmp2] "=r" (Tmp2)
+  , [Desired] "+r" (desired)
+  , [Expected] "+r" (expected)
+  , [Result] "=r" (Result)
+  , [Memory] "+r" (addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+template<>
+uint16_t AtomicCompareAndSwap(uint16_t expected, uint16_t desired, uint16_t *addr) {
+  using Type = uint16_t;
+  //force Result to r9 (scratch register) or clang spills to stack
+  register Type Result asm("r9"){};
+  Type Tmp{};
+  Type Tmp2{};
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxrh %w[Tmp], [%[Memory]];
+    cmp %w[Tmp], %w[Expected], uxth;
+    b.ne 2f;
+    stlxrh %w[Tmp2], %w[Desired], [%[Memory]];
+    cbnz %w[Tmp2], 1b;
+    mov %w[Result], %w[Expected];
+    b 3f;
+  2:
+    mov %w[Result], %w[Tmp];
+    clrex;
+  3:
+  )"
+  : [Tmp] "=r" (Tmp)
+  , [Tmp2] "=r" (Tmp2)
+  , [Desired] "+r" (desired)
+  , [Expected] "+r" (expected)
+  , [Result] "=r" (Result)
+  , [Memory] "+r" (addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+template<>
+uint32_t AtomicCompareAndSwap(uint32_t expected, uint32_t desired, uint32_t *addr) {
+  using Type = uint32_t;
+  //force Result to r9 (scratch register) or clang spills to stack
+  register Type Result asm("r9"){};
+  Type Tmp{};
+  Type Tmp2{};
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxr %w[Tmp], [%[Memory]];
+    cmp %w[Tmp], %w[Expected];
+    b.ne 2f;
+    stlxr %w[Tmp2], %w[Desired], [%[Memory]];
+    cbnz %w[Tmp2], 1b;
+    mov %w[Result], %w[Expected];
+    b 3f;
+  2:
+    mov %w[Result], %w[Tmp];
+    clrex;
+  3:
+  )"
+  : [Tmp] "=r" (Tmp)
+  , [Tmp2] "=r" (Tmp2)
+  , [Desired] "+r" (desired)
+  , [Expected] "+r" (expected)
+  , [Result] "=r" (Result)
+  , [Memory] "+r" (addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+template<>
+uint64_t AtomicCompareAndSwap(uint64_t expected, uint64_t desired, uint64_t *addr) {
+  using Type = uint64_t;
+  //force Result to r9 (scratch register) or clang spills to stack
+  register Type Result asm("r9"){};
+  Type Tmp{};
+  Type Tmp2{};
+  __asm__ volatile(
+  R"(
+  1:
+    ldaxr %[Tmp], [%[Memory]];
+    cmp %[Tmp], %[Expected];
+    b.ne 2f;
+    stlxr %w[Tmp2], %[Desired], [%[Memory]];
+    cbnz %w[Tmp2], 1b;
+    mov %[Result], %[Expected];
+    b 3f;
+  2:
+    mov %[Result], %[Tmp];
+    clrex;
+  3:
+  )"
+  : [Tmp] "=r" (Tmp)
+  , [Tmp2] "=r" (Tmp2)
+  , [Desired] "+r" (desired)
+  , [Expected] "+r" (expected)
+  , [Result] "=r" (Result)
+  , [Memory] "+r" (addr)
+  :: "memory"
+  );
+  return Result;
+}
+
+#endif
+
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(CASPair) {
+  auto Op = IROp->C<IR::IROp_CASPair>();
+  uint8_t OpSize = IROp->Size;
+
+  // Size is the size of each pair element
+  switch (OpSize) {
+    case 4: {
+      GD = AtomicCompareAndSwap(
+        *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
+        *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
+        *GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
+      );
+      break;
+    }
+    case 8: {
+      std::atomic<__uint128_t> *MemData = *GetSrc<std::atomic<__uint128_t> **>(Data->SSAData, Op->Header.Args[2]);
+
+      __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+      __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+      __uint128_t Expected = Src1;
+      bool Result = MemData->compare_exchange_strong(Expected, Src2);
+      memcpy(GDP, Result ? &Src1 : &Expected, 16);
+      break;
+    }
+    default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
+  }
+}
+
+DEF_OP(CAS) {
+  auto Op = IROp->C<IR::IROp_CAS>();
+  uint8_t OpSize = IROp->Size;
+
+  switch (OpSize) {
+    case 1: {
+      GD = AtomicCompareAndSwap(
+        *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]),
+        *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]),
+        *GetSrc<uint8_t**>(Data->SSAData, Op->Header.Args[2])
+      );
+      break;
+    }
+    case 2: {
+      GD = AtomicCompareAndSwap(
+        *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[0]),
+        *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]),
+        *GetSrc<uint16_t**>(Data->SSAData, Op->Header.Args[2])
+      );
+      break;
+    }
+    case 4: {
+      GD = AtomicCompareAndSwap(
+        *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[0]),
+        *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]),
+        *GetSrc<uint32_t**>(Data->SSAData, Op->Header.Args[2])
+      );
+      break;
+    }
+    case 8: {
+      GD = AtomicCompareAndSwap(
+        *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]),
+        *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]),
+        *GetSrc<uint64_t**>(Data->SSAData, Op->Header.Args[2])
+      );
+      break;
+    }
+    default: LOGMAN_MSG_A_FMT("Unknown CAS size: {}", OpSize); break;
+  }
+}
+
+DEF_OP(AtomicAdd) {
+  auto Op = IROp->C<IR::IROp_AtomicAdd>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData += Src;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData += Src;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData += Src;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData += Src;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicSub) {
+  auto Op = IROp->C<IR::IROp_AtomicSub>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData -= Src;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData -= Src;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData -= Src;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData -= Src;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicAnd) {
+  auto Op = IROp->C<IR::IROp_AtomicAnd>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData &= Src;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData &= Src;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData &= Src;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData &= Src;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicOr) {
+  auto Op = IROp->C<IR::IROp_AtomicOr>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData |= Src;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData |= Src;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData |= Src;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData |= Src;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicXor) {
+  auto Op = IROp->C<IR::IROp_AtomicXor>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData ^= Src;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData ^= Src;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData ^= Src;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      *MemData ^= Src;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicSwap) {
+  auto Op = IROp->C<IR::IROp_AtomicSwap>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->exchange(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->exchange(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->exchange(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->exchange(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchAdd) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->fetch_add(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->fetch_add(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->fetch_add(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->fetch_add(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchSub) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->fetch_sub(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->fetch_sub(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->fetch_sub(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->fetch_sub(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchAnd) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->fetch_and(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->fetch_and(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->fetch_and(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->fetch_and(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchOr) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->fetch_or(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->fetch_or(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->fetch_or(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->fetch_or(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchXor) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
+  switch (Op->Size) {
+    case 1: {
+      std::atomic<uint8_t> *MemData = *GetSrc<std::atomic<uint8_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint8_t Src = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint8_t Previous = MemData->fetch_xor(Src);
+      GD = Previous;
+      break;
+    }
+    case 2: {
+      std::atomic<uint16_t> *MemData = *GetSrc<std::atomic<uint16_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint16_t Src = *GetSrc<uint16_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint16_t Previous = MemData->fetch_xor(Src);
+      GD = Previous;
+      break;
+    }
+    case 4: {
+      std::atomic<uint32_t> *MemData = *GetSrc<std::atomic<uint32_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint32_t Src = *GetSrc<uint32_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint32_t Previous = MemData->fetch_xor(Src);
+      GD = Previous;
+      break;
+    }
+    case 8: {
+      std::atomic<uint64_t> *MemData = *GetSrc<std::atomic<uint64_t> **>(Data->SSAData, Op->Header.Args[0]);
+      uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+      uint64_t Previous = MemData->fetch_xor(Src);
+      GD = Previous;
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+DEF_OP(AtomicFetchNeg) {
+  auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
+  switch (Op->Size) {
+    case 1: {
+      using Type = uint8_t;
+      GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
+      break;
+    }
+    case 2: {
+      using Type = uint16_t;
+      GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
+      break;
+    }
+    case 4: {
+      using Type = uint32_t;
+      GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
+      break;
+    }
+    case 8: {
+      using Type = uint64_t;
+      GD = AtomicFetchNeg(*GetSrc<Type**>(Data->SSAData, Op->Header.Args[0]));
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", Op->Size);
+  }
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterAtomicHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(CASPAIR,        CASPair);
+  REGISTER_OP(CAS,            CAS);
+  REGISTER_OP(ATOMICADD,      AtomicAdd);
+  REGISTER_OP(ATOMICSUB,      AtomicSub);
+  REGISTER_OP(ATOMICAND,      AtomicAnd);
+  REGISTER_OP(ATOMICOR,       AtomicOr);
+  REGISTER_OP(ATOMICXOR,      AtomicXor);
+  REGISTER_OP(ATOMICSWAP,     AtomicSwap);
+  REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
+  REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
+  REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
+  REGISTER_OP(ATOMICFETCHOR,  AtomicFetchOr);
+  REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
+  REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
+#undef REGISTER_OP
+}
+}
+
--- a/External/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/BranchOps.cpp
@ -0,0 +1,157 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+#include "Interface/HLE/Thunks/Thunks.h"
+
+#include <FEXCore/Utils/BitUtils.h>
+#include <FEXCore/HLE/SyscallHandler.h>
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+[[noreturn]]
+static void SignalReturn(FEXCore::Core::InternalThreadState *Thread) {
+  Thread->CTX->SignalThread(Thread, FEXCore::Core::SignalEvent::Return);
+
+  LOGMAN_MSG_A_FMT("unreachable");
+  FEX_UNREACHABLE;
+}
+
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(GuestCallDirect) {
+  LogMan::Msg::DFmt("Unimplemented");
+}
+
+DEF_OP(GuestCallIndirect) {
+  LogMan::Msg::DFmt("Unimplemented");
+}
+
+DEF_OP(GuestReturn) {
+  LogMan::Msg::DFmt("Unimplemented");
+}
+
+DEF_OP(SignalReturn) {
+  SignalReturn(Data->State);
+}
+
+DEF_OP(CallbackReturn) {
+  Data->State->CTX->InterpreterCallbackReturn(Data->State, Data->StackEntry);
+}
+
+DEF_OP(ExitFunction) {
+  auto Op = IROp->C<IR::IROp_ExitFunction>();
+  uint8_t OpSize = IROp->Size;
+
+  uintptr_t* ContextPtr = reinterpret_cast<uintptr_t*>(Data->State->CurrentFrame);
+
+  void *ContextData = reinterpret_cast<void*>(ContextPtr);
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+
+  memcpy(ContextData, Src, OpSize);
+
+  Data->BlockResults.Quit = true;
+}
+
+DEF_OP(Jump) {
+  auto Op = IROp->C<IR::IROp_Jump>();
+  uintptr_t ListBegin = Data->CurrentIR->GetListData();
+  uintptr_t DataBegin = Data->CurrentIR->GetData();
+
+  Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->Header.Args[0]);
+  Data->BlockResults.Redo = true;
+}
+
+DEF_OP(CondJump) {
+  auto Op = IROp->C<IR::IROp_CondJump>();
+  uintptr_t ListBegin = Data->CurrentIR->GetListData();
+  uintptr_t DataBegin = Data->CurrentIR->GetData();
+
+  bool CompResult;
+
+  uint64_t Src1 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp1);
+  uint64_t Src2 = *GetSrc<uint64_t*>(Data->SSAData, Op->Cmp2);
+
+  if (Op->CompareSize == 4)
+    CompResult = IsConditionTrue<uint32_t, int32_t, float>(Op->Cond.Val, Src1, Src2);
+  else
+    CompResult = IsConditionTrue<uint64_t, int64_t, double>(Op->Cond.Val, Src1, Src2);
+
+  if (CompResult) {
+    Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->TrueBlock);
+  }
+  else  {
+    Data->BlockIterator = IR::NodeIterator(ListBegin, DataBegin, Op->FalseBlock);
+  }
+  Data->BlockResults.Redo = true;
+}
+
+DEF_OP(Syscall) {
+  auto Op = IROp->C<IR::IROp_Syscall>();
+
+  FEXCore::HLE::SyscallArguments Args;
+  for (size_t j = 0; j < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++j) {
+    if (Op->Header.Args[j].IsInvalid()) break;
+    Args.Argument[j] = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[j]);
+  }
+
+  uint64_t Res = FEXCore::Context::HandleSyscall(Data->State->CTX->SyscallHandler, Data->State->CurrentFrame, &Args);
+  GD = Res;
+}
+
+DEF_OP(Thunk) {
+  auto Op = IROp->C<IR::IROp_Thunk>();
+
+  auto thunkFn = Data->State->CTX->ThunkHandler->LookupThunk(Op->ThunkNameHash);
+  thunkFn(*GetSrc<void**>(Data->SSAData, Op->Header.Args[0]));
+}
+
+DEF_OP(ValidateCode) {
+  auto Op = IROp->C<IR::IROp_ValidateCode>();
+
+  auto CodePtr = Data->CurrentEntry + Op->Offset;
+  if (memcmp((void*)CodePtr, &Op->CodeOriginalLow, Op->CodeLength) != 0) {
+    GD = 1;
+  } else {
+    GD = 0;
+  }
+}
+
+DEF_OP(RemoveCodeEntry) {
+  Data->State->CTX->RemoveCodeEntry(Data->State, Data->CurrentEntry);
+}
+
+DEF_OP(CPUID) {
+  auto Op = IROp->C<IR::IROp_CPUID>();
+  uint64_t *DstPtr = GetDest<uint64_t*>(Data->SSAData, Node);
+  uint64_t Arg = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
+  uint64_t Leaf = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  auto Results = Data->State->CTX->CPUID.RunFunction(Arg, Leaf);
+  memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterBranchHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(GUESTCALLDIRECT,   GuestCallDirect);
+  REGISTER_OP(GUESTCALLINDIRECT, GuestCallIndirect);
+  REGISTER_OP(GUESTRETURN,       GuestReturn);
+  REGISTER_OP(SIGNALRETURN,      SignalReturn);
+  REGISTER_OP(CALLBACKRETURN,    CallbackReturn);
+  REGISTER_OP(EXITFUNCTION,      ExitFunction);
+  REGISTER_OP(JUMP,              Jump);
+  REGISTER_OP(CONDJUMP,          CondJump);
+  REGISTER_OP(SYSCALL,           Syscall);
+  REGISTER_OP(THUNK,             Thunk);
+  REGISTER_OP(VALIDATECODE,      ValidateCode);
+  REGISTER_OP(REMOVECODEENTRY,   RemoveCodeEntry);
+  REGISTER_OP(CPUID,             CPUID);
+#undef REGISTER_OP
+}
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/ConversionOps.cpp
@ -0,0 +1,237 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(VInsGPR) {
+  auto Op = IROp->C<IR::IROp_VInsGPR>();
+  uint8_t OpSize = IROp->Size;
+
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+  __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  uint64_t Offset = Op->Index * Op->Header.ElementSize * 8;
+  __uint128_t Mask = (1ULL << (Op->Header.ElementSize * 8)) - 1;
+  if (Op->Header.ElementSize == 8) {
+    Mask = ~0ULL;
+  }
+  Src2 = Src2 & Mask;
+  Mask <<= Offset;
+  Mask = ~Mask;
+  __uint128_t Dst = Src1 & Mask;
+  Dst |= Src2 << Offset;
+
+  memcpy(GDP, &Dst, OpSize);
+}
+
+DEF_OP(VCastFromGPR) {
+  auto Op = IROp->C<IR::IROp_VCastFromGPR>();
+  memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), Op->Header.ElementSize);
+}
+
+DEF_OP(Float_FromGPR_S) {
+  auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();
+
+  uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
+  switch (Conv) {
+    case 0x0404: { // Float <- int32_t
+      float Dst = (float)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, Op->Header.ElementSize);
+      break;
+    }
+    case 0x0408: { // Float <- int64_t
+      float Dst = (float)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, Op->Header.ElementSize);
+      break;
+    }
+    case 0x0804: { // Double <- int32_t
+      double Dst = (double)*GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, Op->Header.ElementSize);
+      break;
+    }
+    case 0x0808: { // Double <- int64_t
+      double Dst = (double)*GetSrc<int64_t*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, Op->Header.ElementSize);
+      break;
+    }
+  }
+}
+
+DEF_OP(Float_FToF) {
+  auto Op = IROp->C<IR::IROp_Float_FToF>();
+  uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
+  switch (Conv) {
+    case 0x0804: { // Double <- Float
+      double Dst = (double)*GetSrc<float*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, 8);
+      break;
+    }
+    case 0x0408: { // Float <- Double
+      float Dst = (float)*GetSrc<double*>(Data->SSAData, Op->Header.Args[0]);
+      memcpy(GDP, &Dst, 4);
+      break;
+    }
+    default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
+  }
+}
+
+DEF_OP(Vector_SToF) {
+  auto Op = IROp->C<IR::IROp_Vector_SToF>();
+  uint8_t OpSize = IROp->Size;
+
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  uint8_t Tmp[16]{};
+
+  uint8_t Elements = OpSize / Op->Header.ElementSize;
+
+  auto Func = [](auto a, auto min, auto max) { return a; };
+  switch (Op->Header.ElementSize) {
+    DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0, 0)
+    DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0, 0)
+    default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
+  }
+  memcpy(GDP, Tmp, OpSize);
+}
+
+DEF_OP(Vector_FToZS) {
+  auto Op = IROp->C<IR::IROp_Vector_FToZS>();
+  uint8_t OpSize = IROp->Size;
+
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  uint8_t Tmp[16]{};
+
+  uint8_t Elements = OpSize / Op->Header.ElementSize;
+
+  auto Func = [](auto a, auto min, auto max) { return std::trunc(a); };
+  switch (Op->Header.ElementSize) {
+    DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
+    DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
+    default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
+  }
+  memcpy(GDP, Tmp, OpSize);
+}
+
+DEF_OP(Vector_FToS) {
+  auto Op = IROp->C<IR::IROp_Vector_FToS>();
+  uint8_t OpSize = IROp->Size;
+
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  uint8_t Tmp[16]{};
+
+  uint8_t Elements = OpSize / Op->Header.ElementSize;
+
+  auto Func = [](auto a, auto min, auto max) { return std::nearbyint(a); };
+  switch (Op->Header.ElementSize) {
+    DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, float, Func, 0, 0)
+    DO_VECTOR_1SRC_2TYPE_OP(8, int64_t, double, Func, 0, 0)
+    default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
+  }
+  memcpy(GDP, Tmp, OpSize);
+}
+
+DEF_OP(Vector_FToF) {
+  auto Op = IROp->C<IR::IROp_Vector_FToF>();
+  uint8_t OpSize = IROp->Size;
+
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  uint8_t Tmp[16]{};
+
+  uint16_t Conv = (Op->Header.ElementSize << 8) | Op->SrcElementSize;
+
+  auto Func = [](auto a, auto min, auto max) { return a; };
+  switch (Conv) {
+    case 0x0804: { // Double <- float
+      // Only the lower elements from the source
+      // This uses half the source elements
+      uint8_t Elements = OpSize / 8;
+      DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(double, float, Func, 0, 0)
+      break;
+    }
+    case 0x0408: { // Float <- Double
+      // Little bit tricky here
+      // Sometimes is used to convert from a 128bit vector register
+      // in to a 64bit vector register with different sized elements
+      // eg: %ssa5 i32v2 = Vector_FToF %ssa4 i128, #0x8
+      uint8_t Elements = (OpSize << 1) / Op->SrcElementSize;
+      DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(float, double, Func, 0, 0)
+      break;
+    }
+    default: LOGMAN_MSG_A_FMT("Unknown Conversion Type : 0x{:04x}", Conv); break;
+  }
+  memcpy(GDP, Tmp, OpSize);
+}
+
+DEF_OP(Vector_FToI) {
+  auto Op = IROp->C<IR::IROp_Vector_FToI>();
+  uint8_t OpSize = IROp->Size;
+
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  uint8_t Tmp[16]{};
+
+  uint8_t Elements = OpSize / Op->Header.ElementSize;
+  auto Func_Nearest = [](auto a) { return std::rint(a); };
+  auto Func_Neg = [](auto a) { return std::floor(a); };
+  auto Func_Pos = [](auto a) { return std::ceil(a); };
+  auto Func_Trunc = [](auto a) { return std::trunc(a); };
+  auto Func_Host = [](auto a) { return std::rint(a); };
+
+  switch (Op->Round) {
+    case FEXCore::IR::Round_Nearest.Val:
+      switch (Op->Header.ElementSize) {
+        DO_VECTOR_1SRC_OP(4, float, Func_Nearest)
+        DO_VECTOR_1SRC_OP(8, double, Func_Nearest)
+      }
+    break;
+    case FEXCore::IR::Round_Negative_Infinity.Val:
+      switch (Op->Header.ElementSize) {
+        DO_VECTOR_1SRC_OP(4, float, Func_Neg)
+        DO_VECTOR_1SRC_OP(8, double, Func_Neg)
+      }
+    break;
+    case FEXCore::IR::Round_Positive_Infinity.Val:
+      switch (Op->Header.ElementSize) {
+        DO_VECTOR_1SRC_OP(4, float, Func_Pos)
+        DO_VECTOR_1SRC_OP(8, double, Func_Pos)
+      }
+    break;
+    case FEXCore::IR::Round_Towards_Zero.Val:
+      switch (Op->Header.ElementSize) {
+        DO_VECTOR_1SRC_OP(4, float, Func_Trunc)
+        DO_VECTOR_1SRC_OP(8, double, Func_Trunc)
+      }
+    break;
+    case FEXCore::IR::Round_Host.Val:
+      switch (Op->Header.ElementSize) {
+        DO_VECTOR_1SRC_OP(4, float, Func_Host)
+        DO_VECTOR_1SRC_OP(8, double, Func_Host)
+      }
+    break;
+  }
+  memcpy(GDP, Tmp, OpSize);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterConversionHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(VINSGPR,         VInsGPR);
+  REGISTER_OP(VCASTFROMGPR,    VCastFromGPR);
+  REGISTER_OP(FLOAT_FROMGPR_S, Float_FromGPR_S);
+  REGISTER_OP(FLOAT_FTOF,      Float_FToF);
+  REGISTER_OP(VECTOR_STOF,     Vector_SToF);
+  REGISTER_OP(VECTOR_FTOZS,    Vector_FToZS);
+  REGISTER_OP(VECTOR_FTOS,     Vector_FToS);
+  REGISTER_OP(VECTOR_FTOF,     Vector_FToF);
+  REGISTER_OP(VECTOR_FTOI,     Vector_FToI);
+#undef REGISTER_OP
+}
+
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/EncryptionOps.cpp
@ -0,0 +1,443 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+
+namespace AES {
+  static __uint128_t InvShiftRows(uint8_t *State) {
+    uint8_t Shifted[16] = {
+      State[0], State[13], State[10], State[7],
+      State[4], State[1], State[14], State[11],
+      State[8], State[5], State[2], State[15],
+      State[12], State[9], State[6], State[3],
+    };
+    __uint128_t Res{};
+    memcpy(&Res, Shifted, 16);
+    return Res;
+  }
+
+  static __uint128_t InvSubBytes(uint8_t *State) {
+    // 16x16 matrix table
+    static const uint8_t InvSubstitutionTable[256] = {
+      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
+    };
+
+    // Uses a byte substitution table with a constant set of values
+    // Needs to do a table look up
+    uint8_t Substituted[16];
+    for (size_t i = 0; i < 16; ++i) {
+      Substituted[i] = InvSubstitutionTable[State[i]];
+    }
+
+    __uint128_t Res{};
+    memcpy(&Res, Substituted, 16);
+    return Res;
+  }
+
+  static __uint128_t ShiftRows(uint8_t *State) {
+    uint8_t Shifted[16] = {
+      State[0], State[5], State[10], State[15],
+      State[4], State[9], State[14], State[3],
+      State[8], State[13], State[2], State[7],
+      State[12], State[1], State[6], State[11],
+    };
+    __uint128_t Res{};
+    memcpy(&Res, Shifted, 16);
+    return Res;
+  }
+
+  static __uint128_t SubBytes(uint8_t *State, size_t Bytes) {
+    // 16x16 matrix table
+    static const uint8_t SubstitutionTable[256] = {
+      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+    };
+    // Uses a byte substitution table with a constant set of values
+    // Needs to do a table look up
+    uint8_t Substituted[16];
+    Bytes = std::min(Bytes, (size_t)16);
+    for (size_t i = 0; i < Bytes; ++i) {
+      Substituted[i] = SubstitutionTable[State[i]];
+    }
+
+    __uint128_t Res{};
+    memcpy(&Res, Substituted, Bytes);
+    return Res;
+  }
+
+  static uint8_t FFMul02(uint8_t in) {
+    static const uint8_t FFMul02[256] = {
+      0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+      0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+      0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+      0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+      0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+      0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+      0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+      0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+      0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+      0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+      0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+      0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+      0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+      0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+      0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+      0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
+    };
+    return FFMul02[in];
+  }
+
+  static uint8_t FFMul03(uint8_t in) {
+    static const uint8_t FFMul03[256] = {
+      0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+      0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+      0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+      0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+      0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+      0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+      0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+      0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+      0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+      0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+      0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+      0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+      0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+      0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+      0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+      0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
+    };
+    return FFMul03[in];
+  }
+
+  static __uint128_t MixColumns(uint8_t *State) {
+    uint8_t In0[16] = {
+      State[0], State[4], State[8], State[12],
+      State[1], State[5], State[9], State[13],
+      State[2], State[6], State[10], State[14],
+      State[3], State[7], State[11], State[15],
+    };
+
+    uint8_t Out0[4]{};
+    uint8_t Out1[4]{};
+    uint8_t Out2[4]{};
+    uint8_t Out3[4]{};
+
+    for (size_t i = 0; i < 4; ++i) {
+      Out0[i] = FFMul02(In0[0 + i]) ^ FFMul03(In0[4 + i]) ^ In0[8 + i] ^ In0[12 + i];
+      Out1[i] = In0[0 + i] ^ FFMul02(In0[4 + i]) ^ FFMul03(In0[8 + i]) ^ In0[12 + i];
+      Out2[i] = In0[0 + i] ^ In0[4 + i] ^ FFMul02(In0[8 + i]) ^ FFMul03(In0[12 + i]);
+      Out3[i] = FFMul03(In0[0 + i]) ^ In0[4 + i] ^ In0[8 + i] ^ FFMul02(In0[12 + i]);
+    }
+
+    uint8_t OutArray[16] = {
+      Out0[0], Out1[0], Out2[0], Out3[0],
+      Out0[1], Out1[1], Out2[1], Out3[1],
+      Out0[2], Out1[2], Out2[2], Out3[2],
+      Out0[3], Out1[3], Out2[3], Out3[3],
+    };
+    __uint128_t Res{};
+    memcpy(&Res, OutArray, 16);
+    return Res;
+  }
+
+  static uint8_t FFMul09(uint8_t in) {
+    static const uint8_t FFMul09[256] = {
+      0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+      0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+      0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+      0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+      0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+      0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+      0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+      0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+      0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+      0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+      0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+      0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+      0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+      0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+      0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+      0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
+    };
+    return FFMul09[in];
+  }
+
+  static uint8_t FFMul0B(uint8_t in) {
+    static const uint8_t FFMul0B[256] = {
+      0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+      0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+      0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+      0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+      0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+      0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+      0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+      0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+      0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+      0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+      0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+      0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+      0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+      0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+      0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+      0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
+    };
+    return FFMul0B[in];
+  }
+
+  static uint8_t FFMul0D(uint8_t in) {
+    static const uint8_t FFMul0D[256] = {
+      0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+      0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+      0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+      0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+      0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+      0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+      0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+      0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+      0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+      0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+      0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+      0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+      0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+      0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+      0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+      0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
+    };
+
+    return FFMul0D[in];
+  }
+
+  static uint8_t FFMul0E(uint8_t in) {
+    static const uint8_t FFMul0E[256] = {
+      0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+      0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+      0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+      0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+      0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+      0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+      0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+      0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+      0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+      0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+      0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+      0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+      0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+      0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+      0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+      0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
+    };
+
+    return FFMul0E[in];
+  }
+
+  static __uint128_t InvMixColumns(uint8_t *State) {
+    uint8_t In0[16] = {
+      State[0], State[4], State[8], State[12],
+      State[1], State[5], State[9], State[13],
+      State[2], State[6], State[10], State[14],
+      State[3], State[7], State[11], State[15],
+    };
+
+    uint8_t Out0[4]{};
+    uint8_t Out1[4]{};
+    uint8_t Out2[4]{};
+    uint8_t Out3[4]{};
+
+    for (size_t i = 0; i < 4; ++i) {
+      Out0[i] = FFMul0E(In0[0 + i]) ^ FFMul0B(In0[4 + i]) ^ FFMul0D(In0[8 + i]) ^ FFMul09(In0[12 + i]);
+      Out1[i] = FFMul09(In0[0 + i]) ^ FFMul0E(In0[4 + i]) ^ FFMul0B(In0[8 + i]) ^ FFMul0D(In0[12 + i]);
+      Out2[i] = FFMul0D(In0[0 + i]) ^ FFMul09(In0[4 + i]) ^ FFMul0E(In0[8 + i]) ^ FFMul0B(In0[12 + i]);
+      Out3[i] = FFMul0B(In0[0 + i]) ^ FFMul0D(In0[4 + i]) ^ FFMul09(In0[8 + i]) ^ FFMul0E(In0[12 + i]);
+    }
+
+    uint8_t OutArray[16] = {
+      Out0[0], Out1[0], Out2[0], Out3[0],
+      Out0[1], Out1[1], Out2[1], Out3[1],
+      Out0[2], Out1[2], Out2[2], Out3[2],
+      Out0[3], Out1[3], Out2[3], Out3[3],
+    };
+    __uint128_t Res{};
+    memcpy(&Res, OutArray, 16);
+    return Res;
+  }
+}
+
+namespace FEXCore::CPU {
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+
+DEF_OP(AESImc) {
+  auto Op = IROp->C<IR::IROp_VAESImc>();
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+
+  // Pseudo-code
+  // Dst = InvMixColumns(STATE)
+  __uint128_t Tmp{};
+  Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Src1));
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+DEF_OP(AESEnc) {
+  auto Op = IROp->C<IR::IROp_VAESEnc>();
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+  __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  // Pseudo-code
+  // STATE = Src1
+  // RoundKey = Src2
+  // STATE = ShiftRows(STATE)
+  // STATE = SubBytes(STATE)
+  // STATE = MixColumns(STATE)
+  // Dst = STATE XOR RoundKey
+  __uint128_t Tmp{};
+  Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
+  Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
+  Tmp = AES::MixColumns(reinterpret_cast<uint8_t*>(&Tmp));
+  Tmp = Tmp ^ Src2;
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+DEF_OP(AESEncLast) {
+  auto Op = IROp->C<IR::IROp_VAESEncLast>();
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+  __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  // Pseudo-code
+  // STATE = Src1
+  // RoundKey = Src2
+  // STATE = ShiftRows(STATE)
+  // STATE = SubBytes(STATE)
+  // Dst = STATE XOR RoundKey
+  __uint128_t Tmp{};
+  Tmp = AES::ShiftRows(reinterpret_cast<uint8_t*>(&Src1));
+  Tmp = AES::SubBytes(reinterpret_cast<uint8_t*>(&Tmp), 16);
+  Tmp = Tmp ^ Src2;
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+DEF_OP(AESDec) {
+  auto Op = IROp->C<IR::IROp_VAESDec>();
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+  __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  // Pseudo-code
+  // STATE = Src1
+  // RoundKey = Src2
+  // STATE = InvShiftRows(STATE)
+  // STATE = InvSubBytes(STATE)
+  // STATE = InvMixColumns(STATE)
+  // Dst = STATE XOR RoundKey
+  __uint128_t Tmp{};
+  Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
+  Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
+  Tmp = AES::InvMixColumns(reinterpret_cast<uint8_t*>(&Tmp));
+  Tmp = Tmp ^ Src2;
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+DEF_OP(AESDecLast) {
+  auto Op = IROp->C<IR::IROp_VAESDecLast>();
+  __uint128_t Src1 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+  __uint128_t Src2 = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  // Pseudo-code
+  // STATE = Src1
+  // RoundKey = Src2
+  // STATE = InvShiftRows(STATE)
+  // STATE = InvSubBytes(STATE)
+  // Dst = STATE XOR RoundKey
+  __uint128_t Tmp{};
+  Tmp = AES::InvShiftRows(reinterpret_cast<uint8_t*>(&Src1));
+  Tmp = AES::InvSubBytes(reinterpret_cast<uint8_t*>(&Tmp));
+  Tmp = Tmp ^ Src2;
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+DEF_OP(AESKeyGenAssist) {
+  auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
+  uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
+
+  // Pseudo-code
+  // X3 = Src1[127:96]
+  // X2 = Src1[95:64]
+  // X1 = Src1[63:32]
+  // X0 = Src1[31:30]
+  // RCON = (Zext)rcon
+  // Dest[31:0] = SubWord(X1)
+  // Dest[63:32] = RotWord(SubWord(X1)) XOR RCON
+  // Dest[95:64] = SubWord(X3)
+  // Dest[127:96] = RotWord(SubWord(X3)) XOR RCON
+  __uint128_t Tmp{};
+  uint32_t X1{};
+  uint32_t X3{};
+  memcpy(&X1, &Src1[4], 4);
+  memcpy(&X3, &Src1[12], 4);
+  uint32_t SubWord_X1 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X1), 4);
+  uint32_t SubWord_X3 = AES::SubBytes(reinterpret_cast<uint8_t*>(&X3), 4);
+
+  auto Ror = [] (auto In, auto R) {
+    auto RotateMask = sizeof(In) * 8 - 1;
+    R &= RotateMask;
+    return (In >> R) | (In << (sizeof(In) * 8 - R));
+  };
+
+  uint32_t Rot_X1 = Ror(SubWord_X1, 8);
+  uint32_t Rot_X3 = Ror(SubWord_X3, 8);
+
+  Tmp = Rot_X3 ^ Op->RCON;
+  Tmp <<= 32;
+  Tmp |= SubWord_X3;
+  Tmp <<= 32;
+  Tmp |= Rot_X1 ^ Op->RCON;
+  Tmp <<= 32;
+  Tmp |= SubWord_X1;
+  memcpy(GDP, &Tmp, sizeof(Tmp));
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterEncryptionHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(VAESIMC,     AESImc);
+  REGISTER_OP(VAESENC,     AESEnc);
+  REGISTER_OP(VAESENCLAST, AESEncLast);
+  REGISTER_OP(VAESDEC,     AESDec);
+  REGISTER_OP(VAESDECLAST, AESDecLast);
+  REGISTER_OP(VAESKEYGENASSIST, AESKeyGenAssist);
+#undef REGISTER_OP
+}
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.cpp
@ -0,0 +1,389 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include "F80Ops.h"
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(F80LOADFCW) {
+  FEXCore::CPU::OpHandlers<IR::OP_F80LOADFCW>::handle(*GetSrc<uint16_t*>(Data->SSAData, IROp->Args[0]));
+}
+
+DEF_OP(F80ADD) {
+  auto Op = IROp->C<IR::IROp_F80Add>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FADD(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80SUB) {
+  auto Op = IROp->C<IR::IROp_F80Sub>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FSUB(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80MUL) {
+  auto Op = IROp->C<IR::IROp_F80Mul>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FMUL(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80DIV) {
+  auto Op = IROp->C<IR::IROp_F80Div>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FDIV(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80FYL2X) {
+  auto Op = IROp->C<IR::IROp_F80FYL2X>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FYL2X(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80ATAN) {
+  auto Op = IROp->C<IR::IROp_F80ATAN>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FATAN(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80FPREM1) {
+  auto Op = IROp->C<IR::IROp_F80FPREM1>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FREM1(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80FPREM) {
+  auto Op = IROp->C<IR::IROp_F80FPREM>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FREM(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80SCALE) {
+  auto Op = IROp->C<IR::IROp_F80SCALE>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FSCALE(Src1, Src2);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80CVT) {
+  auto Op = IROp->C<IR::IROp_F80CVT>();
+  uint8_t OpSize = IROp->Size;
+
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+
+  switch (OpSize) {
+    case 4: {
+      float Tmp = Src;
+      memcpy(GDP, &Tmp, OpSize);
+      break;
+    }
+    case 8: {
+      double Tmp = Src;
+      memcpy(GDP, &Tmp, OpSize);
+      break;
+    }
+    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
+  }
+}
+
+DEF_OP(F80CVTINT) {
+  auto Op = IROp->C<IR::IROp_F80CVTInt>();
+  uint8_t OpSize = IROp->Size;
+
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+
+  switch (OpSize) {
+    case 2: {
+      int16_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)(Src);
+      memcpy(GDP, &Tmp, sizeof(Tmp));
+      break;
+    }
+    case 4: {
+      int32_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)(Src);
+      memcpy(GDP, &Tmp, sizeof(Tmp));
+      break;
+    }
+    case 8: {
+      int64_t Tmp = (Op->Truncate? FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t : FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)(Src);
+      memcpy(GDP, &Tmp, sizeof(Tmp));
+      break;
+    }
+    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
+  }
+}
+
+DEF_OP(F80CVTTO) {
+  auto Op = IROp->C<IR::IROp_F80CVTTo>();
+
+  switch (Op->Size) {
+    case 4: {
+      float Src = *GetSrc<float *>(Data->SSAData, Op->Header.Args[0]);
+      X80SoftFloat Tmp = Src;
+      memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+      break;
+    }
+    case 8: {
+      double Src = *GetSrc<double *>(Data->SSAData, Op->Header.Args[0]);
+      X80SoftFloat Tmp = Src;
+      memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+      break;
+    }
+    default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
+  }
+}
+
+DEF_OP(F80CVTTOINT) {
+  auto Op = IROp->C<IR::IROp_F80CVTToInt>();
+
+  switch (Op->Size) {
+    case 2: {
+      int16_t Src = *GetSrc<int16_t*>(Data->SSAData, Op->Header.Args[0]);
+      X80SoftFloat Tmp = Src;
+      memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+      break;
+    }
+    case 4: {
+      int32_t Src = *GetSrc<int32_t*>(Data->SSAData, Op->Header.Args[0]);
+      X80SoftFloat Tmp = Src;
+      memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+      break;
+    }
+    default: LogMan::Msg::DFmt("Unhandled size: {}", Op->Size);
+  }
+}
+
+DEF_OP(F80ROUND) {
+  auto Op = IROp->C<IR::IROp_F80Round>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FRNDINT(Src);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80F2XM1) {
+  auto Op = IROp->C<IR::IROp_F80F2XM1>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::F2XM1(Src);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80TAN) {
+  auto Op = IROp->C<IR::IROp_F80TAN>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FTAN(Src);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80SQRT) {
+  auto Op = IROp->C<IR::IROp_F80SQRT>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FSQRT(Src);
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80SIN) {
+  auto Op = IROp->C<IR::IROp_F80SIN>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FSIN(Src);
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80COS) {
+  auto Op = IROp->C<IR::IROp_F80COS>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FCOS(Src);
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80XTRACT_EXP) {
+  auto Op = IROp->C<IR::IROp_F80XTRACT_EXP>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FXTRACT_EXP(Src);
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80XTRACT_SIG) {
+  auto Op = IROp->C<IR::IROp_F80XTRACT_SIG>();
+  X80SoftFloat Src = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Tmp;
+  Tmp = X80SoftFloat::FXTRACT_SIG(Src);
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80CMP) {
+  auto Op = IROp->C<IR::IROp_F80Cmp>();
+  uint32_t ResultFlags{};
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  X80SoftFloat Src2 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[1]);
+  bool eq, lt, nan;
+  X80SoftFloat::FCMP(Src1, Src2, &eq, &lt, &nan);
+  if (Op->Flags & (1 << IR::FCMP_FLAG_LT) &&
+      lt) {
+    ResultFlags |= (1 << IR::FCMP_FLAG_LT);
+  }
+  if (Op->Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
+      nan) {
+    ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
+  }
+  if (Op->Flags & (1 << IR::FCMP_FLAG_EQ) &&
+      eq) {
+    ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
+  }
+
+  GD = ResultFlags;
+}
+
+DEF_OP(F80BCDLOAD) {
+  auto Op = IROp->C<IR::IROp_F80BCDLoad>();
+  uint8_t *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
+  uint64_t BCD{};
+  // We walk through each uint8_t and pull out the BCD encoding
+  // Each 4bit split is a digit
+  // Only 0-9 is supported, A-F results in undefined data
+  // | 4 bit     | 4 bit    |
+  // | 10s place | 1s place |
+  // EG 0x48 = 48
+  // EG 0x4847 = 4847
+  // This gives us an 18digit value encoded in BCD
+  // The last byte lets us know if it negative or not
+  for (size_t i = 0; i < 9; ++i) {
+    uint8_t Digit = Src1[8 - i];
+    // First shift our last value over
+    BCD *= 100;
+
+    // Add the tens place digit
+    BCD += (Digit >> 4) * 10;
+
+    // Add the ones place digit
+    BCD += Digit & 0xF;
+  }
+
+  // Set negative flag once converted to x87
+  bool Negative = Src1[9] & 0x80;
+  X80SoftFloat Tmp;
+
+  Tmp = BCD;
+  Tmp.Sign = Negative;
+
+  memcpy(GDP, &Tmp, sizeof(X80SoftFloat));
+}
+
+DEF_OP(F80BCDSTORE) {
+  auto Op = IROp->C<IR::IROp_F80BCDStore>();
+  X80SoftFloat Src1 = *GetSrc<X80SoftFloat*>(Data->SSAData, Op->Header.Args[0]);
+  bool Negative = Src1.Sign;
+
+  // Clear the Sign bit
+  Src1.Sign = 0;
+
+  uint64_t Tmp = Src1;
+  uint8_t BCD[10]{};
+
+  for (size_t i = 0; i < 9; ++i) {
+    if (Tmp == 0) {
+      // Nothing left? Just leave
+      break;
+    }
+    // Extract the lower 100 values
+    uint8_t Digit = Tmp % 100;
+
+    // Now divide it for the next iteration
+    Tmp /= 100;
+
+    uint8_t UpperNibble = Digit / 10;
+    uint8_t LowerNibble = Digit % 10;
+
+    // Now store the BCD
+    BCD[i] = (UpperNibble << 4) | LowerNibble;
+  }
+
+  // Set negative flag once converted to x87
+  BCD[9] = Negative ? 0x80 : 0;
+
+  memcpy(GDP, BCD, 10);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterF80Handlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(F80LOADFCW, F80LOADFCW);
+  REGISTER_OP(F80ADD, F80ADD);
+  REGISTER_OP(F80SUB, F80SUB);
+  REGISTER_OP(F80MUL, F80MUL);
+  REGISTER_OP(F80DIV, F80DIV);
+  REGISTER_OP(F80FYL2X, F80FYL2X);
+  REGISTER_OP(F80ATAN, F80ATAN);
+  REGISTER_OP(F80FPREM1, F80FPREM1);
+  REGISTER_OP(F80FPREM, F80FPREM);
+  REGISTER_OP(F80SCALE, F80SCALE);
+  REGISTER_OP(F80CVT, F80CVT);
+  REGISTER_OP(F80CVTINT, F80CVTINT);
+  REGISTER_OP(F80CVTTO, F80CVTTO);
+  REGISTER_OP(F80CVTTOINT, F80CVTTOINT);
+  REGISTER_OP(F80ROUND, F80ROUND);
+  REGISTER_OP(F80F2XM1, F80F2XM1);
+  REGISTER_OP(F80TAN, F80TAN);
+  REGISTER_OP(F80SQRT, F80SQRT);
+  REGISTER_OP(F80SIN, F80SIN);
+  REGISTER_OP(F80COS, F80COS);
+  REGISTER_OP(F80XTRACT_EXP, F80XTRACT_EXP);
+  REGISTER_OP(F80XTRACT_SIG, F80XTRACT_SIG);
+  REGISTER_OP(F80CMP, F80CMP);
+  REGISTER_OP(F80BCDLOAD, F80BCDLOAD);
+  REGISTER_OP(F80BCDSTORE, F80BCDSTORE);
+#undef REGISTER_OP
+}
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.h
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/F80Ops.h
@ -0,0 +1,330 @@
+#pragma once
+#include "Common/SoftFloat.h"
+#include "Common/SoftFloat-3e/softfloat.h"
+
+#include <FEXCore/IR/IR.h>
+
+namespace FEXCore::CPU {
+template<IR::IROps Op>
+struct OpHandlers {
+
+};
+
+template<>
+struct OpHandlers<IR::OP_F80CVTTO> {
+  static X80SoftFloat handle4(float src) {
+    return src;
+  }
+
+  static X80SoftFloat handle8(double src) {
+    return src;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80CMP> {
+  template<uint32_t Flags>
+  static uint64_t handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    bool eq, lt, nan;
+    uint64_t ResultFlags = 0;
+
+    X80SoftFloat::FCMP(Src1, Src2, &eq, &lt, &nan);
+    if (Flags & (1 << IR::FCMP_FLAG_LT) &&
+        lt) {
+      ResultFlags |= (1 << IR::FCMP_FLAG_LT);
+    }
+    if (Flags & (1 << IR::FCMP_FLAG_UNORDERED) &&
+        nan) {
+      ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
+    }
+    if (Flags & (1 << IR::FCMP_FLAG_EQ) &&
+        eq) {
+      ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
+    }
+    return ResultFlags;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80CVT> {
+  static float handle4(X80SoftFloat src) {
+    return src;
+  }
+
+  static double handle8(X80SoftFloat src) {
+    return src;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80CVTINT> {
+  static  int16_t handle2(X80SoftFloat src) {
+    return src;
+  }
+
+  static int32_t handle4(X80SoftFloat src) {
+    return src;
+  }
+
+  static int64_t handle8(X80SoftFloat src) {
+    return src;
+  }
+
+  static  int16_t handle2t(X80SoftFloat src) {
+    auto rv = extF80_to_i32(src, softfloat_round_minMag, false);
+
+    if (rv > INT16_MAX) {
+      return INT16_MAX;
+    } else if (rv < INT16_MIN) {
+      return INT16_MIN;
+    } else {
+      return rv;
+    }
+  }
+
+  static int32_t handle4t(X80SoftFloat src) {
+    return extF80_to_i32(src, softfloat_round_minMag, false);
+  }
+
+  static int64_t handle8t(X80SoftFloat src) {
+    return extF80_to_i64(src, softfloat_round_minMag, false);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80CVTTOINT> {
+  static X80SoftFloat handle2(int16_t src) {
+    return src;
+  }
+
+  static X80SoftFloat handle4(int32_t src) {
+    return src;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80ROUND> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FRNDINT(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80F2XM1> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::F2XM1(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80TAN> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FTAN(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80SQRT> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FSQRT(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80SIN> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FSIN(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80COS> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FCOS(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80XTRACT_EXP> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FXTRACT_EXP(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80XTRACT_SIG> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    return X80SoftFloat::FXTRACT_SIG(Src1);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80ADD> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FADD(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80SUB> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FSUB(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80MUL> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FMUL(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80DIV> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FDIV(Src1, Src2);
+  }
+};
+
+
+template<>
+struct OpHandlers<IR::OP_F80FYL2X> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FYL2X(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80ATAN> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FATAN(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80FPREM1> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FREM1(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80FPREM> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FREM(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80SCALE> {
+  static X80SoftFloat handle(X80SoftFloat Src1, X80SoftFloat Src2) {
+    return X80SoftFloat::FSCALE(Src1, Src2);
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80BCDSTORE> {
+  static X80SoftFloat handle(X80SoftFloat Src1) {
+    bool Negative = Src1.Sign;
+
+    // Clear the Sign bit
+    Src1.Sign = 0;
+
+    uint64_t Tmp = Src1;
+    X80SoftFloat Rv;
+    uint8_t *BCD = reinterpret_cast<uint8_t*>(&Rv);
+    memset(BCD, 0, 10);
+
+    for (size_t i = 0; i < 9; ++i) {
+      if (Tmp == 0) {
+        // Nothing left? Just leave
+        break;
+      }
+      // Extract the lower 100 values
+      uint8_t Digit = Tmp % 100;
+
+      // Now divide it for the next iteration
+      Tmp /= 100;
+
+      uint8_t UpperNibble = Digit / 10;
+      uint8_t LowerNibble = Digit % 10;
+
+      // Now store the BCD
+      BCD[i] = (UpperNibble << 4) | LowerNibble;
+    }
+
+    // Set negative flag once converted to x87
+    BCD[9] = Negative ? 0x80 : 0;
+
+    return Rv;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80BCDLOAD> {
+  static X80SoftFloat handle(X80SoftFloat Src) {
+    uint8_t *Src1 = reinterpret_cast<uint8_t *>(&Src);
+    uint64_t BCD{};
+    // We walk through each uint8_t and pull out the BCD encoding
+    // Each 4bit split is a digit
+    // Only 0-9 is supported, A-F results in undefined data
+    // | 4 bit     | 4 bit    |
+    // | 10s place | 1s place |
+    // EG 0x48 = 48
+    // EG 0x4847 = 4847
+    // This gives us an 18digit value encoded in BCD
+    // The last byte lets us know if it negative or not
+    for (size_t i = 0; i < 9; ++i) {
+      uint8_t Digit = Src1[8 - i];
+      // First shift our last value over
+      BCD *= 100;
+
+      // Add the tens place digit
+      BCD += (Digit >> 4) * 10;
+
+      // Add the ones place digit
+      BCD += Digit & 0xF;
+    }
+
+    // Set negative flag once converted to x87
+    bool Negative = Src1[9] & 0x80;
+    X80SoftFloat Tmp;
+
+    Tmp = BCD;
+    Tmp.Sign = Negative;
+    return Tmp;
+  }
+};
+
+template<>
+struct OpHandlers<IR::OP_F80LOADFCW> {
+  static void handle(uint16_t NewFCW) {
+
+    auto PC = (NewFCW >> 8) & 3;
+    switch(PC) {
+      case 0: extF80_roundingPrecision = 32; break;
+      case 2: extF80_roundingPrecision = 64; break;
+      case 3: extF80_roundingPrecision = 80; break;
+      case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
+    }
+
+    auto RC = (NewFCW >> 10) & 3;
+    switch(RC) {
+      case 0:
+        softfloat_roundingMode = softfloat_round_near_even;
+        break;
+      case 1:
+        softfloat_roundingMode = softfloat_round_min;
+        break;
+      case 2:
+        softfloat_roundingMode = softfloat_round_max;
+        break;
+      case 3:
+        softfloat_roundingMode = softfloat_round_minMag;
+      break;
+    }
+  }
+};
+
+
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/FlagOps.cpp
@ -0,0 +1,27 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(GetHostFlag) {
+  auto Op = IROp->C<IR::IROp_GetHostFlag>();
+  GD = (*GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]) >> Op->Flag) & 1;
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterFlagHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(GETHOSTFLAG, GetHostFlag);
+#undef REGISTER_OP
+}
+}
+
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h
@ -32,19 +32,16 @@ public:

  bool HandleSIGBUS(int Signal, void *info, void *ucontext);

+  static void InitializeInterpreterOpHandlers();
+
 private:
  FEXCore::Context::Context *CTX;
  FEXCore::Core::InternalThreadState *State;

  uint32_t AllocateTmpSpace(size_t Size);

-  template<typename Res>
-  Res GetDest(void* SSAData, IR::OrderedNodeWrapper Op);
-
-  template<typename Res>
-  Res GetSrc(void* SSAData, IR::OrderedNodeWrapper Src);
-
  std::unique_ptr<Dispatcher> Dispatcher{};
+
 };

 }
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp
@ -95,10 +95,27 @@ bool InterpreterCore::HandleSIGBUS(int Signal, void *info, void *ucontext) {
  return false;
 }

+void InitializeInterpreterOpHandlers() {
+  for (uint32_t i = 0; i <= FEXCore::IR::IROps::OP_LAST; ++i) {
+    InterpreterOps::OpHandlers[i] = &InterpreterOps::Op_Unhandled;
+  }
+
+  InterpreterOps::RegisterALUHandlers();
+  InterpreterOps::RegisterAtomicHandlers();
+  InterpreterOps::RegisterBranchHandlers();
+  InterpreterOps::RegisterConversionHandlers();
+  InterpreterOps::RegisterFlagHandlers();
+  InterpreterOps::RegisterMemoryHandlers();
+  InterpreterOps::RegisterMiscHandlers();
+  InterpreterOps::RegisterMoveHandlers();
+  InterpreterOps::RegisterVectorHandlers();
+  InterpreterOps::RegisterEncryptionHandlers();
+  InterpreterOps::RegisterF80Handlers();
+}
+
 InterpreterCore::InterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread)
  : CTX {ctx}
  , State {Thread} {
-  // Grab our space for temporary data

  if (!CompileThread &&
      CTX->Config.Core == FEXCore::Config::CONFIG_INTERPRETER) {
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.h
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.h
@ -13,6 +13,7 @@ namespace FEXCore::Core {
 namespace FEXCore::CPU {
 class CPUBackend;

+void InitializeInterpreterOpHandlers();
 std::unique_ptr<CPUBackend> CreateInterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread);

 }
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterDefines.h
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterDefines.h
@ -0,0 +1,179 @@
+#pragma once
+
+#include <FEXCore/IR/IR.h>
+
+#define GD *GetDest<uint64_t*>(Data->SSAData, Node)
+#define GDP GetDest<void*>(Data->SSAData, Node)
+
+#define DO_OP(size, type, func)              \
+  case size: {                                      \
+               auto *Dst_d  = reinterpret_cast<type*>(GDP);  \
+               auto *Src1_d = reinterpret_cast<type*>(Src1); \
+               auto *Src2_d = reinterpret_cast<type*>(Src2); \
+               *Dst_d = func(*Src1_d, *Src2_d);          \
+               break;                                            \
+             }
+#define DO_SCALAR_COMPARE_OP(size, type, type2, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type2*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  Dst_d[0] = func(Src1_d[0], Src2_d[0]);          \
+  break;                                            \
+  }
+
+#define DO_VECTOR_COMPARE_OP(size, type, type2, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type2*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src1_d[i], Src2_d[i]);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_OP(size, type, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src1_d[i], Src2_d[i]);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_PAIR_OP(size, type, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src1_d[i*2], Src1_d[i*2 + 1]);          \
+    Dst_d[i+Elements] = func(Src2_d[i*2], Src2_d[i*2 + 1]);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_SCALAR_OP(size, type, func)\
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src1_d[i], *Src2_d);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_0SRC_OP(size, type, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func();          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_1SRC_OP(size, type, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type*>(Src); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src_d[i]);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_REDUCE_1SRC_OP(size, type, func, start_val)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type*>(Src); \
+  type begin = start_val;                           \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    begin = func(begin, Src_d[i]);          \
+  }                                                 \
+  Dst_d[0] = begin;                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_SAT_OP(size, type, func, min, max)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = func(Src1_d[i], Src2_d[i], min, max);          \
+  }                                                 \
+  break;                                            \
+  }
+
+#define DO_VECTOR_1SRC_2TYPE_OP(size, type, type2, func, min, max)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type2*>(Src); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = (type)func(Src_d[i], min, max);          \
+  }                                                 \
+  break;                                            \
+  }
+
+#define DO_VECTOR_1SRC_2TYPE_OP_NOSIZE(type, type2, func, min, max)              \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type2*>(Src); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = (type)func(Src_d[i], min, max);          \
+  }
+#define DO_VECTOR_1SRC_2TYPE_OP_TOP(size, type, type2, func, min, max)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type2*>(Src2); \
+  memcpy(Dst_d, Src1, Elements * sizeof(type2));\
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i+Elements] = (type)func(Src_d[i], min, max);          \
+  }                                                 \
+  break;                                            \
+  }
+
+#define DO_VECTOR_1SRC_2TYPE_OP_TOP_SRC(size, type, type2, func, min, max)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src_d = reinterpret_cast<type2*>(Src); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = (type)func(Src_d[i+Elements], min, max);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_2SRC_2TYPE_OP(size, type, type2, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type2*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type2*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = (type)func((type)Src1_d[i], (type)Src2_d[i]);          \
+  }                                                 \
+  break;                                            \
+  }
+#define DO_VECTOR_2SRC_2TYPE_OP_TOP_SRC(size, type, type2, func)              \
+  case size: {                                      \
+  auto *Dst_d  = reinterpret_cast<type*>(Tmp);  \
+  auto *Src1_d = reinterpret_cast<type2*>(Src1); \
+  auto *Src2_d = reinterpret_cast<type2*>(Src2); \
+  for (uint8_t i = 0; i < Elements; ++i) {          \
+    Dst_d[i] = (type)func((type)Src1_d[i+Elements], (type)Src2_d[i+Elements]);          \
+  }                                                 \
+  break;                                            \
+  }
+
+template<typename Res>
+Res GetDest(void* SSAData, FEXCore::IR::OrderedNodeWrapper Op) {
+  auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op.ID()];
+  return reinterpret_cast<Res>(DstPtr);
+}
+
+template<typename Res>
+Res GetDest(void* SSAData, uint32_t Op) {
+  auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Op];
+  return reinterpret_cast<Res>(DstPtr);
+}
+
+
+template<typename Res>
+Res GetSrc(void* SSAData, FEXCore::IR::OrderedNodeWrapper Src) {
+  auto DstPtr = &reinterpret_cast<__uint128_t*>(SSAData)[Src.ID()];
+  return reinterpret_cast<Res>(DstPtr);
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp
--- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h
@ -1,6 +1,9 @@
 #pragma once
 #include <stdint.h>

+#include <FEXCore/IR/IR.h>
+#include <FEXCore/IR/IntrusiveIRList.h>
+
 namespace FEXCore::Core {
  struct InternalThreadState;
 }
@ -42,5 +45,365 @@ namespace FEXCore::CPU {
    public:
      static void InterpretIR(FEXCore::Core::InternalThreadState *Thread, uint64_t Entry, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData);
      static bool GetFallbackHandler(IR::IROp_Header *IROp, FallbackInfo *Info);
+
+      static void RegisterALUHandlers();
+      static void RegisterAtomicHandlers();
+      static void RegisterBranchHandlers();
+      static void RegisterConversionHandlers();
+      static void RegisterFlagHandlers();
+      static void RegisterMemoryHandlers();
+      static void RegisterMiscHandlers();
+      static void RegisterMoveHandlers();
+      static void RegisterVectorHandlers();
+      static void RegisterEncryptionHandlers();
+      static void RegisterF80Handlers();
+
+      struct IROpData {
+        FEXCore::Core::InternalThreadState *State{};
+        uint64_t CurrentEntry{};
+        FEXCore::IR::IRListView *CurrentIR{};
+        volatile void *StackEntry{};
+        void *SSAData{};
+        struct {
+          bool Quit;
+          bool Redo;
+        } BlockResults{};
+
+        IR::NodeIterator BlockIterator{0, 0};
+      };
+
+      using OpHandler = std::function<void(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)>;
+      static std::array<OpHandler, FEXCore::IR::IROps::OP_LAST + 1> OpHandlers;
+
+#define DEF_OP(x) static void Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+
+  ///< Unhandled handler
+  DEF_OP(Unhandled);
+
+  ///< No-op Handler
+  DEF_OP(NoOp);
+
+  ///< ALU Ops
+  DEF_OP(TruncElementPair);
+  DEF_OP(Constant);
+  DEF_OP(EntrypointOffset);
+  DEF_OP(InlineConstant);
+  DEF_OP(InlineEntrypointOffset);
+  DEF_OP(CycleCounter);
+  DEF_OP(Add);
+  DEF_OP(Sub);
+  DEF_OP(Neg);
+  DEF_OP(Mul);
+  DEF_OP(UMul);
+  DEF_OP(Div);
+  DEF_OP(UDiv);
+  DEF_OP(Rem);
+  DEF_OP(URem);
+  DEF_OP(MulH);
+  DEF_OP(UMulH);
+  DEF_OP(Or);
+  DEF_OP(And);
+  DEF_OP(Xor);
+  DEF_OP(Lshl);
+  DEF_OP(Lshr);
+  DEF_OP(Ashr);
+  DEF_OP(Rol);
+  DEF_OP(Ror);
+  DEF_OP(Extr);
+  DEF_OP(LDiv);
+  DEF_OP(LUDiv);
+  DEF_OP(LRem);
+  DEF_OP(LURem);
+  DEF_OP(Zext);
+  DEF_OP(Not);
+  DEF_OP(Popcount);
+  DEF_OP(FindLSB);
+  DEF_OP(FindMSB);
+  DEF_OP(FindTrailingZeros);
+  DEF_OP(CountLeadingZeroes);
+  DEF_OP(Rev);
+  DEF_OP(Bfi);
+  DEF_OP(Bfe);
+  DEF_OP(Sbfe);
+  DEF_OP(Select);
+  DEF_OP(VExtractToGPR);
+  DEF_OP(Float_ToGPR_ZU);
+  DEF_OP(Float_ToGPR_ZS);
+  DEF_OP(Float_ToGPR_S);
+  DEF_OP(FCmp);
+
+  ///< Atomic ops
+  DEF_OP(CASPair);
+  DEF_OP(CAS);
+  DEF_OP(AtomicAdd);
+  DEF_OP(AtomicSub);
+  DEF_OP(AtomicAnd);
+  DEF_OP(AtomicOr);
+  DEF_OP(AtomicXor);
+  DEF_OP(AtomicSwap);
+  DEF_OP(AtomicFetchAdd);
+  DEF_OP(AtomicFetchSub);
+  DEF_OP(AtomicFetchAnd);
+  DEF_OP(AtomicFetchOr);
+  DEF_OP(AtomicFetchXor);
+  DEF_OP(AtomicFetchNeg);
+
+  ///< Branch ops
+  DEF_OP(GuestCallDirect);
+  DEF_OP(GuestCallIndirect);
+  DEF_OP(GuestReturn);
+  DEF_OP(SignalReturn);
+  DEF_OP(CallbackReturn);
+  DEF_OP(ExitFunction);
+  DEF_OP(Jump);
+  DEF_OP(CondJump);
+  DEF_OP(Syscall);
+  DEF_OP(Thunk);
+  DEF_OP(ValidateCode);
+  DEF_OP(RemoveCodeEntry);
+  DEF_OP(CPUID);
+
+  ///< Conversion ops
+  DEF_OP(VInsGPR);
+  DEF_OP(VCastFromGPR);
+  DEF_OP(Float_FromGPR_S);
+  DEF_OP(Float_FToF);
+  DEF_OP(Vector_SToF);
+  DEF_OP(Vector_FToZS);
+  DEF_OP(Vector_FToS);
+  DEF_OP(Vector_FToF);
+  DEF_OP(Vector_FToI);
+
+  ///< Flag ops
+  DEF_OP(GetHostFlag);
+
+  ///< Memory ops
+  DEF_OP(LoadContext);
+  DEF_OP(StoreContext);
+  DEF_OP(LoadRegister);
+  DEF_OP(StoreRegister);
+  DEF_OP(LoadContextIndexed);
+  DEF_OP(StoreContextIndexed);
+  DEF_OP(SpillRegister);
+  DEF_OP(FillRegister);
+  DEF_OP(LoadFlag);
+  DEF_OP(StoreFlag);
+  DEF_OP(LoadMem);
+  DEF_OP(StoreMem);
+  DEF_OP(VLoadMemElement);
+  DEF_OP(VStoreMemElement);
+  DEF_OP(CacheLineClear);
+
+  ///< Misc ops
+  DEF_OP(EndBlock);
+  DEF_OP(Fence);
+  DEF_OP(Break);
+  DEF_OP(Phi);
+  DEF_OP(PhiValue);
+  DEF_OP(Print);
+  DEF_OP(GetRoundingMode);
+  DEF_OP(SetRoundingMode);
+
+  ///< Move ops
+  DEF_OP(ExtractElementPair);
+  DEF_OP(CreateElementPair);
+  DEF_OP(Mov);
+
+  ///< Vector ops
+  DEF_OP(VectorZero);
+  DEF_OP(VectorImm);
+  DEF_OP(CreateVector2);
+  DEF_OP(CreateVector4);
+  DEF_OP(SplatVector);
+  DEF_OP(VMov);
+  DEF_OP(VAnd);
+  DEF_OP(VBic);
+  DEF_OP(VOr);
+  DEF_OP(VXor);
+  DEF_OP(VAdd);
+  DEF_OP(VSub);
+  DEF_OP(VUQAdd);
+  DEF_OP(VUQSub);
+  DEF_OP(VSQAdd);
+  DEF_OP(VSQSub);
+  DEF_OP(VAddP);
+  DEF_OP(VAddV);
+  DEF_OP(VUMinV);
+  DEF_OP(VURAvg);
+  DEF_OP(VAbs);
+  DEF_OP(VPopcount);
+  DEF_OP(VFAdd);
+  DEF_OP(VFAddP);
+  DEF_OP(VFSub);
+  DEF_OP(VFMul);
+  DEF_OP(VFDiv);
+  DEF_OP(VFMin);
+  DEF_OP(VFMax);
+  DEF_OP(VFRecp);
+  DEF_OP(VFSqrt);
+  DEF_OP(VFRSqrt);
+  DEF_OP(VNeg);
+  DEF_OP(VFNeg);
+  DEF_OP(VNot);
+  DEF_OP(VUMin);
+  DEF_OP(VSMin);
+  DEF_OP(VUMax);
+  DEF_OP(VSMax);
+  DEF_OP(VZip);
+  DEF_OP(VUnZip);
+  DEF_OP(VBSL);
+  DEF_OP(VCMPEQ);
+  DEF_OP(VCMPEQZ);
+  DEF_OP(VCMPGT);
+  DEF_OP(VCMPGTZ);
+  DEF_OP(VCMPLTZ);
+  DEF_OP(VFCMPEQ);
+  DEF_OP(VFCMPNEQ);
+  DEF_OP(VFCMPLT);
+  DEF_OP(VFCMPGT);
+  DEF_OP(VFCMPLE);
+  DEF_OP(VFCMPORD);
+  DEF_OP(VFCMPUNO);
+  DEF_OP(VUShl);
+  DEF_OP(VUShr);
+  DEF_OP(VSShr);
+  DEF_OP(VUShlS);
+  DEF_OP(VUShrS);
+  DEF_OP(VSShrS);
+  DEF_OP(VInsElement);
+  DEF_OP(VInsScalarElement);
+  DEF_OP(VExtractElement);
+  DEF_OP(VDupElement);
+  DEF_OP(VExtr);
+  DEF_OP(VSLI);
+  DEF_OP(VSRI);
+  DEF_OP(VUShrI);
+  DEF_OP(VSShrI);
+  DEF_OP(VShlI);
+  DEF_OP(VUShrNI);
+  DEF_OP(VUShrNI2);
+  DEF_OP(VBitcast);
+  DEF_OP(VSXTL);
+  DEF_OP(VSXTL2);
+  DEF_OP(VUXTL);
+  DEF_OP(VUXTL2);
+  DEF_OP(VSQXTN);
+  DEF_OP(VSQXTN2);
+  DEF_OP(VSQXTUN);
+  DEF_OP(VSQXTUN2);
+  DEF_OP(VUMul);
+  DEF_OP(VUMull);
+  DEF_OP(VSMul);
+  DEF_OP(VSMull);
+  DEF_OP(VUMull2);
+  DEF_OP(VSMull2);
+  DEF_OP(VUABDL);
+  DEF_OP(VTBL1);
+
+  ///< Encryption ops
+  DEF_OP(AESImc);
+  DEF_OP(AESEnc);
+  DEF_OP(AESEncLast);
+  DEF_OP(AESDec);
+  DEF_OP(AESDecLast);
+  DEF_OP(AESKeyGenAssist);
+
+  ///< F80 ops
+  DEF_OP(F80LOADFCW);
+  DEF_OP(F80ADD);
+  DEF_OP(F80SUB);
+  DEF_OP(F80MUL);
+  DEF_OP(F80DIV);
+  DEF_OP(F80FYL2X);
+  DEF_OP(F80ATAN);
+  DEF_OP(F80FPREM1);
+  DEF_OP(F80FPREM);
+  DEF_OP(F80SCALE);
+  DEF_OP(F80CVT);
+  DEF_OP(F80CVTINT);
+  DEF_OP(F80CVTTO);
+  DEF_OP(F80CVTTOINT);
+  DEF_OP(F80ROUND);
+  DEF_OP(F80F2XM1);
+  DEF_OP(F80TAN);
+  DEF_OP(F80SQRT);
+  DEF_OP(F80SIN);
+  DEF_OP(F80COS);
+  DEF_OP(F80XTRACT_EXP);
+  DEF_OP(F80XTRACT_SIG);
+  DEF_OP(F80CMP);
+  DEF_OP(F80BCDLOAD);
+  DEF_OP(F80BCDSTORE);
+#undef DEF_OP
+  template<typename unsigned_type, typename signed_type, typename float_type>
+  static bool IsConditionTrue(uint8_t Cond, uint64_t Src1, uint64_t Src2) {
+    bool CompResult = false;
+    switch (Cond) {
+      case FEXCore::IR::COND_EQ:
+        CompResult = static_cast<unsigned_type>(Src1) == static_cast<unsigned_type>(Src2);
+        break;
+      case FEXCore::IR::COND_NEQ:
+        CompResult = static_cast<unsigned_type>(Src1) != static_cast<unsigned_type>(Src2);
+        break;
+      case FEXCore::IR::COND_SGE:
+        CompResult = static_cast<signed_type>(Src1) >= static_cast<signed_type>(Src2);
+        break;
+      case FEXCore::IR::COND_SLT:
+        CompResult = static_cast<signed_type>(Src1) < static_cast<signed_type>(Src2);
+        break;
+      case FEXCore::IR::COND_SGT:
+        CompResult = static_cast<signed_type>(Src1) > static_cast<signed_type>(Src2);
+        break;
+      case FEXCore::IR::COND_SLE:
+        CompResult = static_cast<signed_type>(Src1) <= static_cast<signed_type>(Src2);
+        break;
+      case FEXCore::IR::COND_UGE:
+        CompResult = static_cast<unsigned_type>(Src1) >= static_cast<unsigned_type>(Src2);
+        break;
+      case FEXCore::IR::COND_ULT:
+        CompResult = static_cast<unsigned_type>(Src1) < static_cast<unsigned_type>(Src2);
+        break;
+      case FEXCore::IR::COND_UGT:
+        CompResult = static_cast<unsigned_type>(Src1) > static_cast<unsigned_type>(Src2);
+        break;
+      case FEXCore::IR::COND_ULE:
+        CompResult = static_cast<unsigned_type>(Src1) <= static_cast<unsigned_type>(Src2);
+        break;
+
+      case FEXCore::IR::COND_FLU:
+        CompResult = reinterpret_cast<float_type&>(Src1) < reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_FGE:
+        CompResult = reinterpret_cast<float_type&>(Src1) >= reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_FLEU:
+        CompResult = reinterpret_cast<float_type&>(Src1) <= reinterpret_cast<float_type&>(Src2) || (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_FGT:
+        CompResult = reinterpret_cast<float_type&>(Src1) > reinterpret_cast<float_type&>(Src2) && !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_FU:
+        CompResult = (std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_FNU:
+        CompResult = !(std::isnan(reinterpret_cast<float_type&>(Src1)) || std::isnan(reinterpret_cast<float_type&>(Src2)));
+        break;
+      case FEXCore::IR::COND_MI:
+      case FEXCore::IR::COND_PL:
+      case FEXCore::IR::COND_VS:
+      case FEXCore::IR::COND_VC:
+      default:
+        LOGMAN_MSG_A_FMT("Unsupported compare type");
+        break;
+    }
+
+    return CompResult;
+  }
+
+  static uint8_t GetOpSize(FEXCore::IR::IRListView *CurrentIR, IR::OrderedNodeWrapper Node) {
+    auto IROp = CurrentIR->GetOp<FEXCore::IR::IROp_Header>(Node);
+    return IROp->Size;
+  }
+
  };
 };
--- a/External/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/MemoryOps.cpp
@ -0,0 +1,289 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+static inline void CacheLineFlush(char *Addr) {
+#ifdef _M_X86_64
+  __asm volatile (
+    "clflush (%[Addr]);"
+    :: [Addr] "r" (Addr)
+    : "memory");
+#else
+  __builtin___clear_cache(Addr, Addr+64);
+#endif
+}
+
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(LoadContext) {
+  auto Op = IROp->C<IR::IROp_LoadContext>();
+  uint8_t OpSize = IROp->Size;
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+  ContextPtr += Op->Offset;
+  #define LOAD_CTX(x, y) \
+    case x: { \
+      y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
+      GD = *MemData; \
+      break; \
+    }
+  switch (OpSize) {
+    LOAD_CTX(1, uint8_t)
+    LOAD_CTX(2, uint16_t)
+    LOAD_CTX(4, uint32_t)
+    LOAD_CTX(8, uint64_t)
+    case 16: {
+      void const *MemData = reinterpret_cast<void const*>(ContextPtr);
+      memcpy(GDP, MemData, OpSize);
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize);
+  }
+  #undef LOAD_CTX
+}
+
+DEF_OP(StoreContext) {
+  auto Op = IROp->C<IR::IROp_StoreContext>();
+  uint8_t OpSize = IROp->Size;
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+  ContextPtr += Op->Offset;
+
+  void *MemData = reinterpret_cast<void*>(ContextPtr);
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  memcpy(MemData, Src, OpSize);
+}
+
+DEF_OP(LoadRegister) {
+  LOGMAN_MSG_A_FMT("Unimplemented");
+}
+
+DEF_OP(StoreRegister) {
+  LOGMAN_MSG_A_FMT("Unimplemented");
+}
+
+DEF_OP(LoadContextIndexed) {
+  auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
+  uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+
+  ContextPtr += Op->BaseOffset;
+  ContextPtr += Index * Op->Stride;
+
+  #define LOAD_CTX(x, y) \
+    case x: { \
+      y const *MemData = reinterpret_cast<y const*>(ContextPtr); \
+      GD = *MemData; \
+      break; \
+    }
+  switch (Op->Size) {
+    LOAD_CTX(1, uint8_t)
+    LOAD_CTX(2, uint16_t)
+    LOAD_CTX(4, uint32_t)
+    LOAD_CTX(8, uint64_t)
+    case 16: {
+      void const *MemData = reinterpret_cast<void const*>(ContextPtr);
+      memcpy(GDP, MemData, Op->Size);
+      break;
+    }
+    default:  LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", Op->Size);
+  }
+  #undef LOAD_CTX
+}
+
+DEF_OP(StoreContextIndexed) {
+  auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
+  uint64_t Index = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[1]);
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+  ContextPtr += Op->BaseOffset;
+  ContextPtr += Index * Op->Stride;
+
+  void *MemData = reinterpret_cast<void*>(ContextPtr);
+  void *Src = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  memcpy(MemData, Src, Op->Size);
+}
+
+DEF_OP(SpillRegister) {
+  LOGMAN_MSG_A_FMT("Unimplemented");
+}
+
+DEF_OP(FillRegister) {
+  LOGMAN_MSG_A_FMT("Unimplemented");
+}
+
+DEF_OP(LoadFlag) {
+  auto Op = IROp->C<IR::IROp_LoadFlag>();
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+  ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
+  ContextPtr += Op->Flag;
+  uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
+  GD = *MemData;
+}
+
+DEF_OP(StoreFlag) {
+  auto Op = IROp->C<IR::IROp_StoreFlag>();
+  uint8_t Arg = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
+
+  uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
+  ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
+  ContextPtr += Op->Flag;
+  uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
+  *MemData = Arg;
+}
+
+DEF_OP(LoadMem) {
+  auto Op = IROp->C<IR::IROp_LoadMem>();
+  uint8_t OpSize = IROp->Size;
+
+  uint8_t const *MemData = *GetSrc<uint8_t const**>(Data->SSAData, Op->Addr);
+
+  if (!Op->Offset.IsInvalid()) {
+    auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
+
+    switch(Op->OffsetType.Val) {
+      case IR::MEM_OFFSET_SXTX.Val: MemData +=  Offset; break;
+      case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
+      case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
+    }
+  }
+  memset(GDP, 0, 16);
+  switch (OpSize) {
+    case 1: {
+      const uint8_t *D = (const uint8_t*)MemData;
+      GD = *D;
+      break;
+    }
+    case 2: {
+      const uint16_t *D = (const uint16_t*)MemData;
+      GD = *D;
+      break;
+    }
+    case 4: {
+      const uint32_t *D = (const uint32_t*)MemData;
+      GD = *D;
+      break;
+    }
+    case 8: {
+      const uint64_t *D = (const uint64_t*)MemData;
+      GD = *D;
+      break;
+    }
+
+    default:
+      memcpy(GDP, MemData, Op->Size);
+      break;
+  }
+}
+
+DEF_OP(StoreMem) {
+  auto Op = IROp->C<IR::IROp_StoreMem>();
+  uint8_t OpSize = IROp->Size;
+
+  uint8_t *MemData = *GetSrc<uint8_t **>(Data->SSAData, Op->Addr);
+
+  if (!Op->Offset.IsInvalid()) {
+    auto Offset = *GetSrc<uintptr_t const*>(Data->SSAData, Op->Offset) * Op->OffsetScale;
+
+    switch(Op->OffsetType.Val) {
+      case IR::MEM_OFFSET_SXTX.Val: MemData +=  Offset; break;
+      case IR::MEM_OFFSET_UXTW.Val: MemData += (uint32_t)Offset; break;
+      case IR::MEM_OFFSET_SXTW.Val: MemData += (int32_t)Offset; break;
+    }
+  }
+  switch (OpSize) {
+    case 1: {
+      *reinterpret_cast<uint8_t*>(MemData) = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
+      break;
+    }
+    case 2: {
+      *reinterpret_cast<uint16_t*>(MemData) = *GetSrc<uint16_t*>(Data->SSAData, Op->Value);
+      break;
+    }
+    case 4: {
+      *reinterpret_cast<uint32_t*>(MemData) = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
+      break;
+    }
+    case 8: {
+      *reinterpret_cast<uint64_t*>(MemData) = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
+      break;
+    }
+
+    default:
+      memcpy(MemData, GetSrc<void*>(Data->SSAData, Op->Value), Op->Size);
+      break;
+  }
+}
+
+DEF_OP(VLoadMemElement) {
+  auto Op = IROp->C<IR::IROp_VLoadMemElement>();
+  void const *MemData = *GetSrc<void const**>(Data->SSAData, Op->Header.Args[0]);
+
+  memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[1]), 16);
+  memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(GDP) + (Op->Header.ElementSize * Op->Index)),
+    MemData, Op->Header.ElementSize);
+}
+
+DEF_OP(VStoreMemElement) {
+  #define STORE_DATA(x, y) \
+    case x: { \
+      y *MemData = *GetSrc<y**>(Data->SSAData, Op->Header.Args[0]); \
+      memcpy(MemData, &GetSrc<y*>(Data->SSAData, Op->Header.Args[1])[Op->Index], sizeof(y)); \
+      break; \
+    }
+
+  auto Op = IROp->C<IR::IROp_VStoreMemElement>();
+  uint8_t OpSize = IROp->Size;
+
+  switch (OpSize) {
+    STORE_DATA(1, uint8_t)
+    STORE_DATA(2, uint16_t)
+    STORE_DATA(4, uint32_t)
+    STORE_DATA(8, uint64_t)
+    default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size"); break;
+  }
+  #undef STORE_DATA
+}
+
+DEF_OP(CacheLineClear) {
+  auto Op = IROp->C<IR::IROp_CacheLineClear>();
+
+  char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
+
+  // 64-byte cache line clear
+  CacheLineFlush(MemData);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterMemoryHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(LOADCONTEXT,         LoadContext);
+  REGISTER_OP(STORECONTEXT,        StoreContext);
+  REGISTER_OP(LOADREGISTER,        LoadRegister);
+  REGISTER_OP(STOREREGISTER,       StoreRegister);
+  REGISTER_OP(LOADCONTEXTINDEXED,  LoadContextIndexed);
+  REGISTER_OP(STORECONTEXTINDEXED, StoreContextIndexed);
+  REGISTER_OP(SPILLREGISTER,       SpillRegister);
+  REGISTER_OP(FILLREGISTER,        FillRegister);
+  REGISTER_OP(LOADFLAG,            LoadFlag);
+  REGISTER_OP(STOREFLAG,           StoreFlag);
+  REGISTER_OP(LOADMEM,             LoadMem);
+  REGISTER_OP(STOREMEM,            StoreMem);
+  REGISTER_OP(LOADMEMTSO,          LoadMem);
+  REGISTER_OP(STOREMEMTSO,         StoreMem);
+  REGISTER_OP(VLOADMEMELEMENT,     VLoadMemElement);
+  REGISTER_OP(VSTOREMEMELEMENT,    VStoreMemElement);
+  REGISTER_OP(CACHELINECLEAR,      CacheLineClear);
+#undef REGISTER_OP
+}
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/MiscOps.cpp
@ -0,0 +1,158 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+#ifdef _M_X86_64
+#include <xmmintrin.h>
+#endif
+
+namespace FEXCore::CPU {
+[[noreturn]]
+static void StopThread(FEXCore::Core::InternalThreadState *Thread) {
+  Thread->CTX->StopThread(Thread);
+
+  LOGMAN_MSG_A_FMT("unreachable");
+  FEX_UNREACHABLE;
+}
+
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(Fence) {
+  auto Op = IROp->C<IR::IROp_Fence>();
+  switch (Op->Fence) {
+    case IR::Fence_Load.Val:
+      std::atomic_thread_fence(std::memory_order_acquire);
+      break;
+    case IR::Fence_LoadStore.Val:
+      std::atomic_thread_fence(std::memory_order_seq_cst);
+      break;
+    case IR::Fence_Store.Val:
+      std::atomic_thread_fence(std::memory_order_release);
+      break;
+    default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
+  }
+}
+
+DEF_OP(Break) {
+  auto Op = IROp->C<IR::IROp_Break>();
+  switch (Op->Reason) {
+    case 4: // HLT
+      StopThread(Data->State);
+    break;
+  default: LOGMAN_MSG_A_FMT("Unknown Break Reason: {}", Op->Reason); break;
+  }
+}
+
+DEF_OP(GetRoundingMode) {
+  uint32_t GuestRounding{};
+#ifdef _M_ARM_64
+  uint64_t Tmp{};
+  __asm(R"(
+    mrs %[Tmp], FPCR;
+  )"
+  : [Tmp] "=r" (Tmp));
+  // Extract the rounding
+  // On ARM the ordering is different than on x86
+  GuestRounding |= ((Tmp >> 24) & 1) ? IR::ROUND_MODE_FLUSH_TO_ZERO : 0;
+  uint8_t RoundingMode = (Tmp >> 22) & 0b11;
+  if (RoundingMode == 0)
+    GuestRounding |= IR::ROUND_MODE_NEAREST;
+  else if (RoundingMode == 1)
+    GuestRounding |= IR::ROUND_MODE_POSITIVE_INFINITY;
+  else if (RoundingMode == 2)
+    GuestRounding |= IR::ROUND_MODE_NEGATIVE_INFINITY;
+  else if (RoundingMode == 3)
+    GuestRounding |= IR::ROUND_MODE_TOWARDS_ZERO;
+#else
+  GuestRounding = _mm_getcsr();
+
+  // Extract the rounding
+  GuestRounding = (GuestRounding >> 13) & 0b111;
+#endif
+  memcpy(GDP, &GuestRounding, sizeof(GuestRounding));
+}
+
+DEF_OP(SetRoundingMode) {
+  auto Op = IROp->C<IR::IROp_SetRoundingMode>();
+  uint8_t GuestRounding = *GetSrc<uint8_t*>(Data->SSAData, Op->Header.Args[0]);
+#ifdef _M_ARM_64
+  uint64_t HostRounding{};
+  __asm volatile(R"(
+    mrs %[Tmp], FPCR;
+  )"
+  : [Tmp] "=r" (HostRounding));
+  // Mask out the rounding
+  HostRounding &= ~(0b111 << 22);
+
+  HostRounding |= (GuestRounding & IR::ROUND_MODE_FLUSH_TO_ZERO) ? (1U << 24) : 0;
+
+  uint8_t RoundingMode = GuestRounding & 0b11;
+  if (RoundingMode == IR::ROUND_MODE_NEAREST)
+    HostRounding |= (0b00U << 22);
+  else if (RoundingMode == IR::ROUND_MODE_POSITIVE_INFINITY)
+    HostRounding |= (0b01U << 22);
+  else if (RoundingMode == IR::ROUND_MODE_NEGATIVE_INFINITY)
+    HostRounding |= (0b10U << 22);
+  else if (RoundingMode == IR::ROUND_MODE_TOWARDS_ZERO)
+    HostRounding |= (0b11U << 22);
+
+  __asm volatile(R"(
+    msr FPCR, %[Tmp];
+  )"
+  :: [Tmp] "r" (HostRounding));
+#else
+  uint32_t HostRounding = _mm_getcsr();
+
+  // Cut out the host rounding mode
+  HostRounding &= ~(0b111 << 13);
+
+  // Insert our new rounding mode
+  HostRounding |= GuestRounding << 13;
+  _mm_setcsr(HostRounding);
+#endif
+}
+
+DEF_OP(Print) {
+  auto Op = IROp->C<IR::IROp_Print>();
+  uint8_t OpSize = IROp->Size;
+
+  if (OpSize <= 8) {
+    uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Header.Args[0]);
+    LogMan::Msg::IFmt(">>>> Value in Arg: 0x{:x}, {}", Src, Src);
+  }
+  else if (OpSize == 16) {
+    __uint128_t Src = *GetSrc<__uint128_t*>(Data->SSAData, Op->Header.Args[0]);
+    uint64_t Src0 = Src;
+    uint64_t Src1 = Src >> 64;
+    LogMan::Msg::IFmt(">>>> Value[0] in Arg: 0x{:x}, {}", Src0, Src0);
+    LogMan::Msg::IFmt("     Value[1] in Arg: 0x{:x}, {}", Src1, Src1);
+  }
+  else
+    LOGMAN_MSG_A_FMT("Unknown value size: {}", OpSize);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterMiscHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(DUMMY,      NoOp);
+  REGISTER_OP(IRHEADER,   NoOp);
+  REGISTER_OP(CODEBLOCK,  NoOp);
+  REGISTER_OP(BEGINBLOCK, NoOp);
+  REGISTER_OP(ENDBLOCK,   NoOp);
+  REGISTER_OP(FENCE,      Fence);
+  REGISTER_OP(BREAK,      Break);
+  REGISTER_OP(PHI,        NoOp);
+  REGISTER_OP(PHIVALUE,   NoOp);
+  REGISTER_OP(PRINT,      Print);
+  REGISTER_OP(GETROUNDINGMODE, GetRoundingMode);
+  REGISTER_OP(SETROUNDINGMODE, SetRoundingMode);
+  REGISTER_OP(INVALIDATEFLAGS,   NoOp);
+#undef REGISTER_OP
+}
+}
--- a/External/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/MoveOps.cpp
@ -0,0 +1,50 @@
+/*
+$info$
+tags: backend|interpreter
+$end_info$
+*/
+
+#include "Interface/Core/Interpreter/InterpreterClass.h"
+#include "Interface/Core/Interpreter/InterpreterOps.h"
+#include "Interface/Core/Interpreter/InterpreterDefines.h"
+
+#include <cstdint>
+
+namespace FEXCore::CPU {
+#define DEF_OP(x) void InterpreterOps::Op_##x(FEXCore::IR::IROp_Header *IROp, IROpData *Data, uint32_t Node)
+DEF_OP(ExtractElementPair) {
+  auto Op = IROp->C<IR::IROp_ExtractElementPair>();
+  uintptr_t Src = GetSrc<uintptr_t>(Data->SSAData, Op->Header.Args[0]);
+  memcpy(GDP,
+    reinterpret_cast<void*>(Src + Op->Header.Size * Op->Element), Op->Header.Size);
+}
+
+DEF_OP(CreateElementPair) {
+  auto Op = IROp->C<IR::IROp_CreateElementPair>();
+  void *Src_Lower = GetSrc<void*>(Data->SSAData, Op->Header.Args[0]);
+  void *Src_Upper = GetSrc<void*>(Data->SSAData, Op->Header.Args[1]);
+
+  uint8_t *Dst = GetDest<uint8_t*>(Data->SSAData, Node);
+
+  memcpy(Dst, Src_Lower, Op->Header.Size);
+  memcpy(Dst + Op->Header.Size, Src_Upper, Op->Header.Size);
+}
+
+DEF_OP(Mov) {
+  auto Op = IROp->C<IR::IROp_Mov>();
+  uint8_t OpSize = IROp->Size;
+
+  memcpy(GDP, GetSrc<void*>(Data->SSAData, Op->Header.Args[0]), OpSize);
+}
+
+#undef DEF_OP
+void InterpreterOps::RegisterMoveHandlers() {
+#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &InterpreterOps::Op_##x
+  REGISTER_OP(EXTRACTELEMENTPAIR, ExtractElementPair);
+  REGISTER_OP(CREATEELEMENTPAIR,  CreateElementPair);
+  REGISTER_OP(MOV,                Mov);
+#undef REGISTER_OP
+}
+}
+
+
--- a/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
+++ b/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp