From 7252050e4274f6057422e5014e2a07b07cf6c972 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 29 Jun 2019 16:50:48 -0700 Subject: [PATCH] Initial commit --- .gitmodules | 6 + CMakeLists.txt | 52 + Examples/CMakeLists.txt | 17 + Examples/SimpleCodeLoader.cpp | 63 + Examples/SimpleCodeLoader.h | 49 + Examples/SimplePrint.cpp | 113 + External/vixl | 1 + LICENSE | 21 + Readme.md | 41 + Scripts/json_ir_generator.py | 373 ++ Source/CMakeLists.txt | 133 + Source/Common/BitSet.h | 49 + Source/Common/MathUtils.h | 13 + Source/Common/Paths.cpp | 30 + Source/Common/Paths.h | 7 + Source/Interface/Config/Config.cpp | 51 + Source/Interface/Context/Context.cpp | 131 + Source/Interface/Context/Context.h | 103 + Source/Interface/Core/BlockCache.cpp | 54 + Source/Interface/Core/BlockCache.h | 104 + Source/Interface/Core/CPUID.cpp | 118 + Source/Interface/Core/CPUID.h | 44 + Source/Interface/Core/Core.cpp | 800 ++++ Source/Interface/Core/Core.h | 23 + Source/Interface/Core/DebugData.h | 5 + Source/Interface/Core/Frontend.cpp | 927 +++++ Source/Interface/Core/Frontend.h | 44 + Source/Interface/Core/InternalThreadState.h | 1 + .../Core/Interpreter/InterpreterCore.cpp | 1468 +++++++ .../Core/Interpreter/InterpreterCore.h | 12 + Source/Interface/Core/JIT/Arm64/JIT.cpp | 1452 +++++++ Source/Interface/Core/JIT/JITCore.h | 15 + Source/Interface/Core/JIT/x86_64/JIT.cpp | 2798 +++++++++++++ Source/Interface/Core/JIT/x86_64/JIT.h | 1 + Source/Interface/Core/LLVMJIT/LLVMCore.cpp | 1933 +++++++++ Source/Interface/Core/LLVMJIT/LLVMCore.h | 11 + .../Core/LLVMJIT/LLVMMemoryManager.cpp | 62 + .../Core/LLVMJIT/LLVMMemoryManager.h | 34 + Source/Interface/Core/OpcodeDispatcher.cpp | 3473 +++++++++++++++++ Source/Interface/Core/OpcodeDispatcher.h | 317 ++ Source/Interface/Core/RegisterAllocation.cpp | 230 ++ Source/Interface/Core/RegisterAllocation.h | 30 + Source/Interface/Core/X86DebugInfo.cpp | 120 + Source/Interface/Core/X86Tables.cpp | 2642 +++++++++++++ Source/Interface/HLE/FileManagement.cpp | 243 ++ Source/Interface/HLE/FileManagement.h | 77 + Source/Interface/HLE/Syscalls.cpp | 464 +++ Source/Interface/HLE/Syscalls.h | 106 + Source/Interface/IR/IR.cpp | 66 + Source/Interface/IR/IR.json | 526 +++ Source/Interface/IR/PassManager.cpp | 27 + Source/Interface/IR/PassManager.h | 27 + Source/Interface/IR/Passes.h | 17 + Source/Interface/IR/Passes/ConstProp.cpp | 51 + .../IR/Passes/DeadContextStoreElimination.cpp | 116 + Source/Interface/IR/Passes/IRCompaction.cpp | 121 + Source/Interface/IR/Passes/IRValidation.cpp | 153 + .../RedundantFlagCalculationElimination.cpp | 66 + .../IR/Passes/SyscallOptimization.cpp | 46 + Source/Interface/Memory/MemMapper.cpp | 61 + Source/Interface/Memory/MemMapper.h | 44 + Source/Interface/Memory/SharedMem.cpp | 61 + Source/Interface/Memory/SharedMem.h | 12 + Source/Test/CMakeLists.txt | 16 + Source/Test/IRTest.cpp | 11 + Source/Test/LLVMIRTest.cpp | 78 + docs/CPUBackends.md | 29 + docs/CustomCPUBackend.md | 17 + docs/Frontend.md | 43 + docs/IR.md | 32 + docs/OpDispatcher.md | 47 + docs/OptimizationPasses.md | 36 + include/FEXCore/Config/Config.h | 24 + include/FEXCore/Core/CPUBackend.h | 74 + include/FEXCore/Core/CodeLoader.h | 78 + include/FEXCore/Core/Context.h | 193 + include/FEXCore/Core/CoreState.h | 39 + include/FEXCore/Core/X86Enums.h | 63 + include/FEXCore/Debug/ContextDebug.h | 35 + include/FEXCore/Debug/InternalThreadState.h | 65 + include/FEXCore/Debug/X86Tables.h | 399 ++ include/FEXCore/HLE/Linux/ThreadManagement.h | 26 + include/FEXCore/HLE/SyscallHandler.h | 9 + include/FEXCore/HLE/SyscallVisitor.h | 49 + include/FEXCore/IR/IR.h | 329 ++ include/FEXCore/IR/IntrusiveIRList.h | 127 + include/FEXCore/Memory/MemMapper.h | 15 + include/FEXCore/Memory/SharedMem.h | 27 + 88 files changed, 22116 insertions(+) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 Examples/CMakeLists.txt create mode 100644 Examples/SimpleCodeLoader.cpp create mode 100644 Examples/SimpleCodeLoader.h create mode 100644 Examples/SimplePrint.cpp create mode 160000 External/vixl create mode 100644 LICENSE create mode 100644 Readme.md create mode 100644 Scripts/json_ir_generator.py create mode 100644 Source/CMakeLists.txt create mode 100644 Source/Common/BitSet.h create mode 100644 Source/Common/MathUtils.h create mode 100644 Source/Common/Paths.cpp create mode 100644 Source/Common/Paths.h create mode 100644 Source/Interface/Config/Config.cpp create mode 100644 Source/Interface/Context/Context.cpp create mode 100644 Source/Interface/Context/Context.h create mode 100644 Source/Interface/Core/BlockCache.cpp create mode 100644 Source/Interface/Core/BlockCache.h create mode 100644 Source/Interface/Core/CPUID.cpp create mode 100644 Source/Interface/Core/CPUID.h create mode 100644 Source/Interface/Core/Core.cpp create mode 100644 Source/Interface/Core/Core.h create mode 100644 Source/Interface/Core/DebugData.h create mode 100644 Source/Interface/Core/Frontend.cpp create mode 100644 Source/Interface/Core/Frontend.h create mode 100644 Source/Interface/Core/InternalThreadState.h create mode 100644 Source/Interface/Core/Interpreter/InterpreterCore.cpp create mode 100644 Source/Interface/Core/Interpreter/InterpreterCore.h create mode 100644 Source/Interface/Core/JIT/Arm64/JIT.cpp create mode 100644 Source/Interface/Core/JIT/JITCore.h create mode 100644 Source/Interface/Core/JIT/x86_64/JIT.cpp create mode 100644 Source/Interface/Core/JIT/x86_64/JIT.h create mode 100644 Source/Interface/Core/LLVMJIT/LLVMCore.cpp create mode 100644 Source/Interface/Core/LLVMJIT/LLVMCore.h create mode 100644 Source/Interface/Core/LLVMJIT/LLVMMemoryManager.cpp create mode 100644 Source/Interface/Core/LLVMJIT/LLVMMemoryManager.h create mode 100644 Source/Interface/Core/OpcodeDispatcher.cpp create mode 100644 Source/Interface/Core/OpcodeDispatcher.h create mode 100644 Source/Interface/Core/RegisterAllocation.cpp create mode 100644 Source/Interface/Core/RegisterAllocation.h create mode 100644 Source/Interface/Core/X86DebugInfo.cpp create mode 100644 Source/Interface/Core/X86Tables.cpp create mode 100644 Source/Interface/HLE/FileManagement.cpp create mode 100644 Source/Interface/HLE/FileManagement.h create mode 100644 Source/Interface/HLE/Syscalls.cpp create mode 100644 Source/Interface/HLE/Syscalls.h create mode 100644 Source/Interface/IR/IR.cpp create mode 100644 Source/Interface/IR/IR.json create mode 100644 Source/Interface/IR/PassManager.cpp create mode 100644 Source/Interface/IR/PassManager.h create mode 100644 Source/Interface/IR/Passes.h create mode 100644 Source/Interface/IR/Passes/ConstProp.cpp create mode 100644 Source/Interface/IR/Passes/DeadContextStoreElimination.cpp create mode 100644 Source/Interface/IR/Passes/IRCompaction.cpp create mode 100644 Source/Interface/IR/Passes/IRValidation.cpp create mode 100644 Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp create mode 100644 Source/Interface/IR/Passes/SyscallOptimization.cpp create mode 100644 Source/Interface/Memory/MemMapper.cpp create mode 100644 Source/Interface/Memory/MemMapper.h create mode 100644 Source/Interface/Memory/SharedMem.cpp create mode 100644 Source/Interface/Memory/SharedMem.h create mode 100644 Source/Test/CMakeLists.txt create mode 100644 Source/Test/IRTest.cpp create mode 100644 Source/Test/LLVMIRTest.cpp create mode 100644 docs/CPUBackends.md create mode 100644 docs/CustomCPUBackend.md create mode 100644 docs/Frontend.md create mode 100644 docs/IR.md create mode 100644 docs/OpDispatcher.md create mode 100644 docs/OptimizationPasses.md create mode 100644 include/FEXCore/Config/Config.h create mode 100644 include/FEXCore/Core/CPUBackend.h create mode 100644 include/FEXCore/Core/CodeLoader.h create mode 100644 include/FEXCore/Core/Context.h create mode 100644 include/FEXCore/Core/CoreState.h create mode 100644 include/FEXCore/Core/X86Enums.h create mode 100644 include/FEXCore/Debug/ContextDebug.h create mode 100644 include/FEXCore/Debug/InternalThreadState.h create mode 100644 include/FEXCore/Debug/X86Tables.h create mode 100644 include/FEXCore/HLE/Linux/ThreadManagement.h create mode 100644 include/FEXCore/HLE/SyscallHandler.h create mode 100644 include/FEXCore/HLE/SyscallVisitor.h create mode 100644 include/FEXCore/IR/IR.h create mode 100644 include/FEXCore/IR/IntrusiveIRList.h create mode 100644 include/FEXCore/Memory/MemMapper.h create mode 100644 include/FEXCore/Memory/SharedMem.h diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..f68b69e50 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "External/SonicUtils"] + path = External/SonicUtils + url = https://github.com/Sonicadvance1/SonicUtils.git +[submodule "External/vixl"] + path = External/vixl + url = https://git.linaro.org/arm/vixl.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..8b99bdbfd --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.10) +set (PROJECT_NAME FEXCore) +project(${PROJECT_NAME} + VERSION 0.01 + LANGUAGES CXX) + +option(ENABLE_CLANG_FORMAT "Run clang format over the source" FALSE) +option(FORCE_AARCH64 "Force AArch64 Target for testing" FALSE) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +find_package(LLVM CONFIG QUIET) +if(LLVM_FOUND AND TARGET LLVM) + message(STATUS "LLVM found!") + include_directories(${LLVM_INCLUDE_DIRS}) +else() + message("Couldn't find LLVM and this project requires it") +endif() + +include(CheckCXXCompilerFlag) +include(CheckIncludeFileCXX) + +if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + set(_M_X86_64 1) + if (NOT FORCE_AARCH64) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-operator-names") + set(CMAKE_REQUIRED_DEFINITIONS "-fno-operator-names") + check_include_file_cxx(xbyak/xbyak.h XBYAK_FOUND) + if (XBYAK_FOUND) + set(ENABLE_JIT 1) + else() + message(STATUS "xbyak not found. Not enabling runtime JIT") + endif() + endif() +endif() +if (FORCE_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + set(_M_ARM_64 1) + set(ENABLE_JIT 1) + add_subdirectory(External/vixl/) + include_directories(External/vixl/src/) +endif() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +add_subdirectory(External/SonicUtils/) +include_directories(External/SonicUtils/) + +add_subdirectory(Source/) +target_include_directories(${PROJECT_NAME} PUBLIC include/) + +add_subdirectory(Examples/) diff --git a/Examples/CMakeLists.txt b/Examples/CMakeLists.txt new file mode 100644 index 000000000..6ee01afe3 --- /dev/null +++ b/Examples/CMakeLists.txt @@ -0,0 +1,17 @@ +set (SRCS + SimpleCodeLoader.cpp) + +set (ExampleName SimpleCodeLoader) + +add_executable(${ExampleName} ${SRCS}) +target_link_libraries(${ExampleName} PRIVATE ${PROJECT_NAME} SonicUtils) + +set (SRCS + SimplePrint.cpp) + +set (ExampleName SimplePrint) + +add_executable(${ExampleName} ${SRCS}) +target_link_libraries(${ExampleName} PRIVATE ${PROJECT_NAME} SonicUtils) + + diff --git a/Examples/SimpleCodeLoader.cpp b/Examples/SimpleCodeLoader.cpp new file mode 100644 index 000000000..38a6a6df3 --- /dev/null +++ b/Examples/SimpleCodeLoader.cpp @@ -0,0 +1,63 @@ +#include "LogManager.h" +#include "SimpleCodeLoader.h" + +#include +#include +#include +#include + +void MsgHandler(LogMan::DebugLevels Level, const char *Message) { + const char *CharLevel{nullptr}; + + switch (Level) { + case LogMan::NONE: + CharLevel = "NONE"; + break; + case LogMan::ASSERT: + CharLevel = "ASSERT"; + break; + case LogMan::ERROR: + CharLevel = "ERROR"; + break; + case LogMan::DEBUG: + CharLevel = "DEBUG"; + break; + case LogMan::INFO: + CharLevel = "Info"; + break; + default: + CharLevel = "???"; + break; + } + printf("[%s] %s\n", CharLevel, Message); +} + +void AssertHandler(const char *Message) { + printf("[ASSERT] %s\n", Message); +} + +int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) { + LogMan::Throw::InstallHandler(AssertHandler); + LogMan::Msg::InstallHandler(MsgHandler); + + static constexpr uint8_t RawCode[] = { + 0x90, // NOP + 0xF4 // HLT + }; + + TestCode Test(RawCode, sizeof(RawCode)); + FEXCore::Context::InitializeStaticTables(); + auto SHM = FEXCore::SHM::AllocateSHMRegion(1ULL << 36); + auto CTX = FEXCore::Context::CreateNewContext(); + + FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_MAXBLOCKINST, 1); + FEXCore::Context::AddGuestMemoryRegion(CTX, SHM); + FEXCore::Context::InitCore(CTX, &Test); + + auto ShutdownReason = FEXCore::Context::RunLoop(CTX, true); + LogMan::Msg::D("Reason we left VM: %d", ShutdownReason); + + FEXCore::Context::DestroyContext(CTX); + FEXCore::SHM::DestroyRegion(SHM); + return 0; +} diff --git a/Examples/SimpleCodeLoader.h b/Examples/SimpleCodeLoader.h new file mode 100644 index 000000000..64132cdcc --- /dev/null +++ b/Examples/SimpleCodeLoader.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +class TestCode final : public FEXCore::CodeLoader { +public: + + TestCode(uint8_t const *Code, size_t Size) + : CodePtr {Code} + , CodeSize {Size} { + } + + uint64_t StackSize() const override { + return STACK_SIZE; + } + + uint64_t SetupStack([[maybe_unused]] void *HostPtr, uint64_t GuestPtr) const override { + return GuestPtr + STACK_SIZE - 16; + } + + uint64_t DefaultRIP() const override { + return RIP; + } + + FEXCore::CodeLoader::MemoryLayout GetLayout() const override { + // Needs to be page aligned + uint64_t CodeSize = 0x1000; + return std::make_tuple(CODE_START_RANGE, CODE_START_RANGE + CodeSize, CodeSize); + } + + void MapMemoryRegion(std::function Mapper) override { + } + + void LoadMemory(MemoryWriter Writer) override { + Writer(reinterpret_cast(CodePtr), 0, CodeSize); + } + + uint64_t GetFinalRIP() override { return CODE_START_RANGE + CodeSize; } + +private: + static constexpr uint64_t STACK_SIZE = 0x1000; + static constexpr uint64_t CODE_START_RANGE = 0x0; + static constexpr uint64_t RIP = 0; + + uint8_t const *CodePtr; + size_t CodeSize; + +}; + diff --git a/Examples/SimplePrint.cpp b/Examples/SimplePrint.cpp new file mode 100644 index 000000000..36e32f104 --- /dev/null +++ b/Examples/SimplePrint.cpp @@ -0,0 +1,113 @@ +#include "LogManager.h" +#include "SimpleCodeLoader.h" + +#include +#include +#include +#include + +void MsgHandler(LogMan::DebugLevels Level, char const *Message) { + const char *CharLevel{nullptr}; + + switch (Level) { + case LogMan::NONE: + CharLevel = "NONE"; + break; + case LogMan::ASSERT: + CharLevel = "ASSERT"; + break; + case LogMan::ERROR: + CharLevel = "ERROR"; + break; + case LogMan::DEBUG: + CharLevel = "DEBUG"; + break; + case LogMan::INFO: + CharLevel = "Info"; + break; + case LogMan::STDERR: + CharLevel = "STDERR"; + break; + case LogMan::STDOUT: + CharLevel = "STDOUT"; + break; + default: + CharLevel = "???"; + break; + } + printf("[%s] %s\n", CharLevel, Message); +} + +void AssertHandler(char const *Message) { + printf("[ASSERT] %s\n", Message); +} + +int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) { + LogMan::Throw::InstallHandler(AssertHandler); + LogMan::Msg::InstallHandler(MsgHandler); + + // Set up a syscall to do a syscall WRITE to stdout + // Syscall handler catches writes to stdout/stderr and pumps it through LogManager + static constexpr uint8_t RawCode[] = { + 0X48, + 0XC7, + 0XC0, + 0X01, + 0X00, + 0X00, + 0X00, // MOV RAX, 0x1 + + 0X48, + 0XC7, + 0XC7, + 0X01, + 0X00, + 0X00, + 0X00, // MOV RDI, 0x1 + + 0X48, + 0XC7, + 0XC6, + 0X1F, + 0X00, + 0X00, + 0X00, // MOV RSI, 0x1F + + 0X48, + 0XC7, + 0XC2, + 0X01, + 0X00, + 0X00, + 0X00, // MOV RDX, 1 + + 0X0F, + 0X05, // SYSCALL + + 0XF4, // HLT + + 0X54, + 0X65, + 0X73, + 0X74, + 0X65, + 0X72, + 0X00, // 'Tester\0' + }; + + TestCode Test(RawCode, sizeof(RawCode)); + FEXCore::Context::InitializeStaticTables(); + auto SHM = FEXCore::SHM::AllocateSHMRegion(1ULL << 36); + auto CTX = FEXCore::Context::CreateNewContext(); + +// FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_MAXBLOCKINST, 1); + FEXCore::Context::AddGuestMemoryRegion(CTX, SHM); + FEXCore::Context::InitCore(CTX, &Test); + + auto ShutdownReason = FEXCore::Context::RunLoop(CTX, true); + LogMan::Msg::D("Reason we left VM: %d", ShutdownReason); + + FEXCore::Context::DestroyContext(CTX); + FEXCore::SHM::DestroyRegion(SHM); + return 0; +} diff --git a/External/vixl b/External/vixl new file mode 160000 index 000000000..107a535ca --- /dev/null +++ b/External/vixl @@ -0,0 +1 @@ +Subproject commit 107a535cad4c1483c24021b8a24adec2deb1daed diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..cdb4f90b3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Ryan Houdek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Readme.md b/Readme.md new file mode 100644 index 000000000..8cd84be64 --- /dev/null +++ b/Readme.md @@ -0,0 +1,41 @@ +# FEXCore - Fast x86 Core emulation library +This is the core emulation library that is used for the FEX emulator project. +This project aims to provide a fast and functional x86-64 emulation library that can meet and surpass other x86-64 emulation libraries. +### Goals +* Be as fast as possible, beating and exceeding current options for x86-64 emulation + * 25% - 50% lower performance than native code would be desired target + * Use an IR to efficiently translate x86-64 to our host architecture + * Support a tiered recompiler to allow for fast runtime performance + * Support offline compilation and offline tooling for inspection and performance analysis + * Support threaded emulation. Including emulating x86-64's strong memory model on weak memory model architectures +* Support a significant portion of the x86-64 instruction space. + * Including MMX, SSE, SSE2, SSE3, SSSE3, and SSE4* +* Support fallback routines for uncommonly used x86-64 instructions + * Including x87 and 3DNow! +* Only support userspace emulation. + * All x86-64 instructions run as if they are under CPL-3(userland) security layer +* Minimal Linux Syscall emulation for testing purposes +* Portable library implementation in order to support easy integration in to applications +### Target Host Architecture +The target host architecture for this library is AArch64. Specifically the ARMv8.1 version or newer. +The CPU IR is designed with AArch64 in mind but there is a desire to run the recompiled code on other architectures as well. +Multiple architecture support is desired for easier bringup and debugging, performance isn't as much of a priority there (ex. x86-64(guest) translated to x86-64(host)) +### Not currently goals but will be in the future +* 32bit x86 support + * This will be a desire in the future, but to lower the amount of work required, decided to push this off for now. +* Integration in to WINE +* Later generation of x86-64 instruction sets + * Including AVX, F16C, XOP, FMA, AVX2, etc +### Not desired +* Kernel space emulation +* CPL0-2 emulation +* Real Mode, Protected Mode, Virtual-8086 Mode, System Management Mode +* IRQs +* SVM +* "Cycle Accurate" emulation +### Dependencies + * [SonicUtils](https://github.com/Sonicadvance1/SonicUtils) + * LLVM + * clang-tidy if you want to ensure the code stays tidy + * cmake + * A C++17 compliant compiler (There are assumptions made about using Clang and LTO) diff --git a/Scripts/json_ir_generator.py b/Scripts/json_ir_generator.py new file mode 100644 index 000000000..7b62a761c --- /dev/null +++ b/Scripts/json_ir_generator.py @@ -0,0 +1,373 @@ +import json +import sys + +# Print out enum values +def print_enums(ops, defines): + output_file.write("#ifdef IROP_ENUM\n") + output_file.write("enum IROps : uint8_t {\n") + + for op_key, op_vals in ops.items(): + output_file.write("\t\tOP_%s,\n" % op_key.upper()) + + output_file.write("};\n") + + output_file.write("#undef IROP_ENUM\n") + output_file.write("#endif\n\n") + +# Print out struct definitions +def print_ir_structs(ops, defines): + output_file.write("#ifdef IROP_STRUCTS\n") + + # Print out defines here + for op_val in defines: + output_file.write("\t%s;\n" % op_val) + + output_file.write("// Default structs\n") + output_file.write("struct __attribute__((packed)) IROp_Header {\n") + output_file.write("\tvoid* Data[0];\n") + output_file.write("\tIROps Op;\n\n") + output_file.write("\tuint8_t Size;\n") + output_file.write("\tuint8_t NumArgs;\n") + output_file.write("\tuint8_t Elements : 7;\n") + output_file.write("\tbool HasDest : 1;\n") + + output_file.write("\ttemplate\n") + output_file.write("\tT const* C() const { return reinterpret_cast(Data); }\n") + output_file.write("\ttemplate\n") + output_file.write("\tT* CW() { return reinterpret_cast(Data); }\n") + + output_file.write("\tNodeWrapper Args[0];\n") + + output_file.write("};\n\n"); + + output_file.write("struct __attribute__((packed)) IROp_Empty {\n") + output_file.write("\tIROp_Header Header;\n") + output_file.write("};\n\n") + + output_file.write("// User defined IR Op structs\n") + for op_key, op_vals in ops.items(): + SSAArgs = 0 + HasArgs = False + + if ("SSAArgs" in op_vals): + SSAArgs = int(op_vals["SSAArgs"]) + + if ("Args" in op_vals and len(op_vals["Args"]) != 0): + HasArgs = True + + if (HasArgs or SSAArgs != 0): + output_file.write("struct __attribute__((packed)) IROp_%s {\n" % op_key) + output_file.write("\tIROp_Header Header;\n\n") + + # SSA arguments have a hard requirement to appear after the header + if (SSAArgs != 0): + output_file.write("private:\n") + for i in range(0, SSAArgs): + output_file.write("\tuint64_t : (sizeof(NodeWrapper) * 8);\n"); + output_file.write("public:\n") + + if (HasArgs): + output_file.write("\t// User defined data\n") + + # Print out arguments in IR Op + for i in range(0, len(op_vals["Args"]), 2): + data_type = op_vals["Args"][i] + data_name = op_vals["Args"][i+1] + output_file.write("\t%s %s;\n" % (data_type, data_name)) + + output_file.write("};\n") + else: + output_file.write("using IROp_%s = IROp_Empty;\n" % op_key) + + # Add a static assert that the IR ops must be pod + output_file.write("static_assert(std::is_pod::value);\n\n" % op_key) + + output_file.write("#undef IROP_STRUCTS\n") + output_file.write("#endif\n\n") + +# Print out const expression to calculate IR Op sizes +def print_ir_sizes(ops, defines): + output_file.write("#ifdef IROP_SIZES\n") + + output_file.write("constexpr std::array IRSizes = {\n") + for op_key, op_vals in ops.items(): + if ("Last" in op_vals): + output_file.write("\t-1ULL,\n") + else: + output_file.write("\tsizeof(IROp_%s),\n" % op_key) + + output_file.write("};\n\n") + + output_file.write("// Make sure our array maps directly to the IROps enum\n") + output_file.write("static_assert(IRSizes[IROps::OP_LAST] == -1ULL);\n\n") + + output_file.write("[[maybe_unused]] static size_t GetSize(IROps Op) { return IRSizes[Op]; }\n\n") + + output_file.write("std::string_view const& GetName(IROps Op);\n") + + output_file.write("#undef IROP_SIZES\n") + output_file.write("#endif\n\n") + +# Print out the name printer implementation +def print_ir_getname(ops, defines): + output_file.write("#ifdef IROP_GETNAME_IMPL\n") + output_file.write("constexpr std::array IRNames = {\n") + for op_key, op_vals in ops.items(): + output_file.write("\t\"%s\",\n" % op_key) + + output_file.write("};\n\n") + + output_file.write("static_assert(IRNames[OP_LAST] == \"Last\");\n\n") + + output_file.write("std::string_view const& GetName(IROps Op) {\n") + output_file.write(" return IRNames[Op];\n") + output_file.write("}\n") + + output_file.write("#undef IROP_GETNAME_IMPL\n") + output_file.write("#endif\n\n") + +# Print out IR argument printing +def print_ir_arg_printer(ops, defines): + output_file.write("#ifdef IROP_ARGPRINTER_HELPER\n") + output_file.write("switch (IROp->Op) {\n") + for op_key, op_vals in ops.items(): + if not ("Last" in op_vals): + SSAArgs = 0 + HasArgs = False + + # Does this not want a printer? + if ("ArgPrinter" in op_vals and op_vals["ArgPrinter"] == False): + continue + + if ("SSAArgs" in op_vals): + SSAArgs = int(op_vals["SSAArgs"]) + + if ("Args" in op_vals and len(op_vals["Args"]) != 0): + HasArgs = True + + output_file.write("case IROps::OP_%s: {\n" % op_key.upper()) + if (HasArgs or SSAArgs != 0): + output_file.write("\tauto Op = IROp->C();\n" % op_key) + output_file.write("\t*out << \" \";\n") + + # Print SSA args first + if (SSAArgs != 0): + for i in range(0, SSAArgs): + LastArg = (SSAArgs - i - 1) == 0 and not HasArgs + output_file.write("\tPrintArg(out, IR, Op->Header.Args[%d]);\n" % i) + if not (LastArg): + output_file.write("\t*out << \", \";\n") + + # Now print user defined arguments + if (HasArgs): + ArgCount = len(op_vals["Args"]) + + for i in range(0, len(op_vals["Args"]), 2): + data_name = op_vals["Args"][i+1] + LastArg = (ArgCount - i - 2) == 0 + CondArg2 = (", ", "") + output_file.write("\tPrintArg(out, IR, Op->%s);\n" % data_name) + if not (LastArg): + output_file.write("\t*out << \", \";\n") + + output_file.write("break;\n") + output_file.write("}\n") + + + output_file.write("#undef IROP_ARGPRINTER_HELPER\n") + output_file.write("#endif\n") + +# Print out IR allocator helpers +def print_ir_allocator_helpers(ops, defines): + output_file.write("#ifdef IROP_ALLOCATE_HELPERS\n") + + output_file.write("\ttemplate \n") + output_file.write("\tusing IRPair = FEXCore::IR::Wrapper;\n\n") + + output_file.write("\tIRPair AllocateRawOp(size_t HeaderSize) {\n") + output_file.write("\t\tauto Op = reinterpret_cast(Data.Allocate(HeaderSize));\n") + output_file.write("\t\tmemset(Op, 0, HeaderSize);\n") + output_file.write("\t\tOp->Op = IROps::OP_DUMMY;\n") + output_file.write("\t\treturn FEXCore::IR::Wrapper{Op, CreateNode(Op)};\n") + output_file.write("\t}\n\n") + + output_file.write("\ttemplate\n") + output_file.write("\tIRPair AllocateOp() {\n") + output_file.write("\t\tsize_t Size = FEXCore::IR::GetSize(T2);\n") + output_file.write("\t\tauto Op = reinterpret_cast(Data.Allocate(Size));\n") + output_file.write("\t\tmemset(Op, 0, Size);\n") + output_file.write("\t\tOp->Header.Op = T2;\n") + output_file.write("\t\treturn FEXCore::IR::Wrapper{Op, CreateNode(&Op->Header)};\n") + output_file.write("\t}\n\n") + + output_file.write("\tuint8_t GetOpSize(OrderedNode *Op) const {\n") + output_file.write("\t\tauto HeaderOp = reinterpret_cast(Op->Header.Value.GetPtr(Data.Begin()));\n") + output_file.write("\t\tLogMan::Throw::A(HeaderOp->HasDest, \"Op %s has no dest\\n\", GetName(HeaderOp->Op));\n") + output_file.write("\t\treturn HeaderOp->Size;\n") + output_file.write("\t}\n\n") + + output_file.write("\tuint8_t GetOpElements(OrderedNode *Op) const {\n") + output_file.write("\t\tauto HeaderOp = reinterpret_cast(Op->Header.Value.GetPtr(Data.Begin()));\n") + output_file.write("\t\tLogMan::Throw::A(HeaderOp->HasDest, \"Op %s has no dest\\n\", GetName(HeaderOp->Op));\n") + output_file.write("\t\treturn HeaderOp->Elements;\n") + output_file.write("\t}\n\n") + + output_file.write("\tbool OpHasDest(OrderedNode *Op) const {\n") + output_file.write("\t\tauto HeaderOp = reinterpret_cast(Op->Header.Value.GetPtr(Data.Begin()));\n") + output_file.write("\t\treturn HeaderOp->HasDest;\n") + output_file.write("\t}\n\n") + + for op_key, op_vals in ops.items(): + if not ("Last" in op_vals): + HasDest = False + HasFixedDestSize = False + FixedDestSize = 0 + HasDestSize = False; + DestSize = "" + + if ("HasDest" in op_vals and op_vals["HasDest"] == True): + HasDest = True + + if ("FixedDestSize" in op_vals): + HasFixedDestSize = True + FixedDestSize = int(op_vals["FixedDestSize"]) + + if ("DestSize" in op_vals): + HasDestSize = True + DestSize = op_vals["DestSize"]; + + output_file.write("\tIRPair _%s() {\n" % (op_key, op_key)) + + output_file.write("\t\tauto Op = AllocateOp();\n" % (op_key, op_key.upper())) + + if (HasDest): + if (HasFixedDestSize): + output_file.write("\t\tOp.first->Header.Size = %d;\n" % FixedDestSize) + + output_file.write("\t\tOp.first->Header.HasDest = true;\n") + + output_file.write("\t\treturn Op;\n") + + output_file.write("\t}\n\n") + + # Generate helpers with operands + for op_key, op_vals in ops.items(): + if not ("Last" in op_vals): + SSAArgs = 0 + HasArgs = False + HasDest = False + HasFixedDestSize = False + FixedDestSize = 0 + HasDestSize = False; + NumElements = 1 + DestSize = "" + + if ("SSAArgs" in op_vals): + SSAArgs = int(op_vals["SSAArgs"]) + + if ("Args" in op_vals and len(op_vals["Args"]) != 0): + HasArgs = True + + if not (HasArgs or SSAArgs != 0): + continue + + if ("HelperGen" in op_vals and op_vals["HelperGen"] == False): + continue; + + if ("HasDest" in op_vals and op_vals["HasDest"] == True): + HasDest = True + + if ("FixedDestSize" in op_vals): + HasFixedDestSize = True + FixedDestSize = int(op_vals["FixedDestSize"]) + + if ("DestSize" in op_vals): + HasDestSize = True + DestSize = op_vals["DestSize"] + + if ("NumElements" in op_vals): + NumElements = int(op_vals["NumElements"]) + + + output_file.write("\tIRPair _%s(" % (op_key, op_key)) + + # Output SSA args first + if (SSAArgs != 0): + for i in range(0, SSAArgs): + LastArg = (SSAArgs - i - 1) == 0 and not HasArgs + CondArg2 = (", ", "") + output_file.write("OrderedNode *ssa%d%s" % (i, CondArg2[LastArg])) + + if (HasArgs): + ArgCount = len(op_vals["Args"]) + for i in range(0, len(op_vals["Args"]), 2): + data_type = op_vals["Args"][i] + data_name = op_vals["Args"][i+1] + LastArg = (ArgCount - i - 2) == 0 + CondArg2 = (", ", "") + + output_file.write("%s %s%s" % (data_type, data_name, CondArg2[LastArg])) + + output_file.write(") {\n") + + output_file.write("\t\tauto Op = AllocateOp();\n" % (op_key, op_key.upper())) + output_file.write("\t\tOp.first->Header.NumArgs = %d;\n" % (SSAArgs)) + + if (SSAArgs != 0): + for i in range(0, SSAArgs): + output_file.write("\t\tOp.first->Header.Args[%d] = ssa%d->Wrapped(ListData.Begin());\n" % (i, i)) + + if (HasArgs): + for i in range(1, len(op_vals["Args"]), 2): + data_name = op_vals["Args"][i] + output_file.write("\t\tOp.first->%s = %s;\n" % (data_name, data_name)) + + if (HasDest): + if (HasFixedDestSize): + output_file.write("\t\tOp.first->Header.Size = %d;\n" % FixedDestSize) + if (HasDestSize): + output_file.write("\t\tOp.first->Header.Size = %s;\n" % DestSize) + output_file.write("\t\tOp.first->Header.Elements = %s;\n" % NumElements) + + if not (HasFixedDestSize or HasDestSize): + # We need to infer destination size + output_file.write("\t\tuint8_t InferSize = 0;\n") + if (SSAArgs != 0): + for i in range(0, SSAArgs): + output_file.write("\t\tuint8_t Size%d = GetOpSize(ssa%s);\n" % (i, i)) + output_file.write("\t\tInferSize = std::max(InferSize, Size%d);\n" % (i)) + + output_file.write("\t\tOp.first->Header.Size = InferSize;\n") + + output_file.write("\t\tOp.first->Header.HasDest = true;\n") + + output_file.write("\t\treturn Op;\n") + output_file.write("\t}\n\n") + + output_file.write("#undef IROP_ALLOCATE_HELPERS\n") + output_file.write("#endif\n") + +if (len(sys.argv) < 3): + sys.exit() + +output_filename = sys.argv[2] +json_file = open(sys.argv[1], "r") +json_text = json_file.read() +json_file.close() + +json_object = json.loads(json_text) +json_object = {k.upper(): v for k, v in json_object.items()} + +ops = json_object["OPS"] +defines = json_object["DEFINES"] + +output_file = open(output_filename, "w") + +print_enums(ops, defines) +print_ir_structs(ops, defines) +print_ir_sizes(ops, defines) +print_ir_getname(ops, defines) +print_ir_arg_printer(ops, defines) +print_ir_allocator_helpers(ops, defines) + +output_file.close() diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt new file mode 100644 index 000000000..1367935b5 --- /dev/null +++ b/Source/CMakeLists.txt @@ -0,0 +1,133 @@ +if (ENABLE_CLANG_FORMAT) + find_program(CLANG_TIDY_EXE "clang-tidy") + set(CLANG_TIDY_FLAGS + "-checks=*" + "-fuchsia*" + "-bugprone-macro-parentheses" + "-clang-analyzer-core.*" + "-cppcoreguidelines-pro-type-*" + "-cppcoreguidelines-pro-bounds-array-to-pointer-decay" + "-cppcoreguidelines-pro-bounds-pointer-arithmetic" + "-cppcoreguidelines-avoid-c-arrays" + "-cppcoreguidelines-avoid-magic-numbers" + "-cppcoreguidelines-pro-bounds-constant-array-index" + "-cppcoreguidelines-no-malloc" + "-cppcoreguidelines-special-member-functions" + "-cppcoreguidelines-owning-memory" + "-cppcoreguidelines-macro-usage" + "-cppcoreguidelines-avoid-goto" + "-google-readability-function-size" + "-google-readability-namespace-comments" + "-google-readability-braces-around-statements" + "-google-build-using-namespace" + "-hicpp-*" + "-llvm-namespace-comment" + "-llvm-include-order" # Messes up with case sensitivity + "-misc-unused-parameters" + "-modernize-loop-convert" + "-modernize-use-auto" + "-modernize-avoid-c-arrays" + "-modernize-use-nodiscard" + "readability-*" + "-readability-function-size" + "-readability-implicit-bool-conversion" + "-readability-braces-around-statements" + "-readability-else-after-return" + "-readability-magic-numbers" + "-readability-named-parameter" + "-readability-uppercase-literal-suffix" + "-cert-err34-c" + "-cert-err58-cpp" + "-bugprone-exception-escape" + ) + string(REPLACE ";" "," CLANG_TIDY_FLAGS "${CLANG_TIDY_FLAGS}") + + set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_EXE} "${CLANG_TIDY_FLAGS}") +endif() + +set (SRCS + Common/Paths.cpp + Interface/Config/Config.cpp + Interface/Context/Context.cpp + Interface/Core/BlockCache.cpp + Interface/Core/Core.cpp + Interface/Core/CPUID.cpp + Interface/Core/Frontend.cpp + Interface/Core/OpcodeDispatcher.cpp + Interface/Core/RegisterAllocation.cpp + Interface/Core/X86Tables.cpp + Interface/Core/X86DebugInfo.cpp + Interface/Core/Interpreter/InterpreterCore.cpp + Interface/Core/LLVMJIT/LLVMCore.cpp + Interface/Core/LLVMJIT/LLVMMemoryManager.cpp + Interface/HLE/FileManagement.cpp + Interface/HLE/Syscalls.cpp + Interface/Memory/MemMapper.cpp + Interface/Memory/SharedMem.cpp + Interface/IR/IR.cpp + Interface/IR/PassManager.cpp + Interface/IR/Passes/ConstProp.cpp + Interface/IR/Passes/DeadContextStoreElimination.cpp + Interface/IR/Passes/IRCompaction.cpp + Interface/IR/Passes/IRValidation.cpp + Interface/IR/Passes/RedundantFlagCalculationElimination.cpp + Interface/IR/Passes/SyscallOptimization.cpp + ) + +set (JIT_LIBS ) +if (ENABLE_JIT) + if (_M_X86_64) + add_definitions(-D_M_X86_64=1) + if (NOT FORCE_AARCH64) + list(APPEND SRCS Interface/Core/JIT/x86_64/JIT.cpp) + endif() + endif() + if(_M_ARM_64) + add_definitions(-D_M_ARM_64=1) + list(APPEND SRCS Interface/Core/JIT/Arm64/JIT.cpp) + list(APPEND JIT_LIBS vixl) + endif() +endif() + +# Generate IR include file +set(OUTPUT_NAME "${CMAKE_CURRENT_BINARY_DIR}/IRDefines.inc") +set(INPUT_NAME "${CMAKE_CURRENT_SOURCE_DIR}/Interface/IR/IR.json") + +add_custom_target(IR_INC + DEPENDS "${INPUT_NAME}" + COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_generator.py" "${INPUT_NAME}" "${OUTPUT_NAME}" + ) + +add_library(${PROJECT_NAME} STATIC ${SRCS}) +add_dependencies(${PROJECT_NAME} IR_INC) +target_link_libraries(${PROJECT_NAME} LLVM pthread rt ${JIT_LIBS}) + +target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") + +target_include_directories(${PROJECT_NAME} PRIVATE IncludePrivate/) + +target_compile_options(${PROJECT_NAME} + PRIVATE + "-Wno-trigraphs") + +# Add in diagnostic colours if the option is available. +# Ninja code generator will kill colours if this isn't here +check_cxx_compiler_flag(-fdiagnostics-color=always GCC_COLOR) +check_cxx_compiler_flag(-fcolor-diagnostics CLANG_COLOR) + +if (GCC_COLOR) + target_compile_options(${PROJECT_NAME} + PRIVATE + "-fdiagnostics-color=always") +endif() +if (CLANG_COLOR) + target_compile_options(${PROJECT_NAME} + PRIVATE + "-fcolor-diagnostics") +endif() + +target_compile_options(${PROJECT_NAME} + PRIVATE + -Wall) + +add_subdirectory(Test/) diff --git a/Source/Common/BitSet.h b/Source/Common/BitSet.h new file mode 100644 index 000000000..1be0cc04c --- /dev/null +++ b/Source/Common/BitSet.h @@ -0,0 +1,49 @@ +#pragma once +#include "Common/MathUtils.h" + +#include +#include +#include + +template +struct BitSet final { + using ElementType = T; + constexpr static size_t MinimumSize = sizeof(ElementType); + constexpr static size_t MinimumSizeBits = sizeof(ElementType) * 8; + + ElementType *Memory; + void Allocate(size_t Elements) { + Memory = static_cast(malloc(AlignUp(Elements / MinimumSize, MinimumSize))); + } + void Realloc(size_t Elements) { + Memory = static_cast(realloc(Memory, AlignUp(Elements / MinimumSize, MinimumSize))); + } + void Free() { + free(Memory); + Memory = nullptr; + } + bool Get(T Element) { + return (Memory[Element / MinimumSizeBits] & (1 << (Element % MinimumSizeBits))) != 0; + } + void Set(T Element) { + Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits)); + } + void Clear(T Element) { + Memory[Element / MinimumSizeBits] &= (1ULL << (Element % MinimumSizeBits)); + } + void MemClear(size_t Size) { + memset(Memory, 0, Size); + } + void MemSet(size_t Size) { + memset(Memory, 0xFF, Size); + } + + // This very explicitly doesn't let you take an address + // Is only a getter + bool operator[](T Element) { + return Get(Element); + } +}; + +static_assert(sizeof(BitSet) == sizeof(uintptr_t), "Needs to just be a pointer"); +static_assert(std::is_pod>::value, "Needs to POD"); diff --git a/Source/Common/MathUtils.h b/Source/Common/MathUtils.h new file mode 100644 index 000000000..8274ad2be --- /dev/null +++ b/Source/Common/MathUtils.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +static inline uint64_t AlignUp(uint64_t value, uint64_t size) { + return value + (size - value % size) % size; +}; + +static inline uint64_t AlignDown(uint64_t value, uint64_t size) { + return value - value % size; +}; + + diff --git a/Source/Common/Paths.cpp b/Source/Common/Paths.cpp new file mode 100644 index 000000000..36769ac82 --- /dev/null +++ b/Source/Common/Paths.cpp @@ -0,0 +1,30 @@ +#include "Common/Paths.h" + +#include +#include + +namespace FEXCore::Paths { + std::string DataPath; + std::string EntryCache; + + void InitializePaths() { + char *HomeDir = getenv("HOME"); + char *XDGDataDir = getenv("XDG_DATA_DIR"); + if (XDGDataDir) { + DataPath = XDGDataDir; + } + else { + if (HomeDir) { + DataPath = HomeDir; + } + } + DataPath += "/.fexcore/"; + EntryCache = DataPath + "/EntryCache/"; + mkdir(DataPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + mkdir(EntryCache.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + } + + std::string GetDataPath() { + return DataPath; + } +} diff --git a/Source/Common/Paths.h b/Source/Common/Paths.h new file mode 100644 index 000000000..024fa3dd3 --- /dev/null +++ b/Source/Common/Paths.h @@ -0,0 +1,7 @@ +#pragma once +#include + +namespace FEXCore::Paths { + void InitializePaths(); + std::string GetDataPath(); +} diff --git a/Source/Interface/Config/Config.cpp b/Source/Interface/Config/Config.cpp new file mode 100644 index 000000000..b026b63e6 --- /dev/null +++ b/Source/Interface/Config/Config.cpp @@ -0,0 +1,51 @@ +#include "LogManager.h" +#include "Interface/Context/Context.h" + +#include + +namespace FEXCore::Config { + void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, uint64_t Config) { + switch (Option) { + case FEXCore::Config::CONFIG_MULTIBLOCK: + CTX->Config.Multiblock = Config != 0; + break; + case FEXCore::Config::CONFIG_MAXBLOCKINST: + CTX->Config.MaxInstPerBlock = Config; + break; + case FEXCore::Config::CONFIG_DEFAULTCORE: + CTX->Config.Core = static_cast(Config); + break; + case FEXCore::Config::CONFIG_VIRTUALMEMSIZE: + CTX->Config.VirtualMemSize = Config; + break; + case FEXCore::Config::CONFIG_SINGLESTEP: + CTX->RunningMode = Config != 0 ? FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP : FEXCore::Context::CoreRunningMode::MODE_RUN; + break; + default: LogMan::Msg::A("Unknown configuration option"); + } + } + + uint64_t GetConfig(FEXCore::Context::Context *CTX, ConfigOption Option) { + switch (Option) { + case FEXCore::Config::CONFIG_MULTIBLOCK: + return CTX->Config.Multiblock; + break; + case FEXCore::Config::CONFIG_MAXBLOCKINST: + return CTX->Config.MaxInstPerBlock; + break; + case FEXCore::Config::CONFIG_DEFAULTCORE: + return CTX->Config.Core; + break; + case FEXCore::Config::CONFIG_VIRTUALMEMSIZE: + return CTX->Config.VirtualMemSize; + break; + case FEXCore::Config::CONFIG_SINGLESTEP: + return CTX->RunningMode == FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP ? 1 : 0; + break; + default: LogMan::Msg::A("Unknown configuration option"); + } + + return 0; + } +} + diff --git a/Source/Interface/Context/Context.cpp b/Source/Interface/Context/Context.cpp new file mode 100644 index 000000000..af6358e0f --- /dev/null +++ b/Source/Interface/Context/Context.cpp @@ -0,0 +1,131 @@ +#include "Common/Paths.h" +#include "Interface/Context/Context.h" +#include "Interface/Core/Core.h" +#include "Interface/Core/OpcodeDispatcher.h" + +#include +#include +#include + +namespace FEXCore::Context { + void InitializeStaticTables() { + FEXCore::Paths::InitializePaths(); + X86Tables::InitializeInfoTables(); + IR::InstallOpcodeHandlers(); + } + + FEXCore::Context::Context *CreateNewContext() { + return new FEXCore::Context::Context{}; + } + + bool InitializeContext(FEXCore::Context::Context *CTX) { + return FEXCore::CPU::CreateCPUCore(CTX); + } + + void DestroyContext(FEXCore::Context::Context *CTX) { + delete CTX; + } + + bool AddGuestMemoryRegion(FEXCore::Context::Context *CTX, FEXCore::SHM::SHMObject *SHM) { + CTX->MemoryMapper.SetBaseRegion(SHM); + return true; + } + + void SetApplicationFile(FEXCore::Context::Context *CTX, std::string const &File) { + CTX->SyscallHandler.SetFilename(File); + // XXX: This isn't good for debugging + // CTX->LoadEntryList(); + } + + bool InitCore(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader) { + return CTX->InitCore(Loader); + } + + FEXCore::Context::ExitReason RunLoop(FEXCore::Context::Context *CTX, bool WaitForIdle) { + return CTX->RunLoop(WaitForIdle); + } + + FEXCore::Context::ExitReason GetExitReason(FEXCore::Context::Context *CTX) { + return CTX->ParentThread->ExitReason; + } + + bool IsDone(FEXCore::Context::Context *CTX) { + return CTX->IsPaused(); + } + + void GetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State) { + memcpy(State, &CTX->ParentThread->State.State, sizeof(FEXCore::Core::CPUState)); + } + + void SetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State) { + memcpy(&CTX->ParentThread->State.State, State, sizeof(FEXCore::Core::CPUState)); + } + + void Pause(FEXCore::Context::Context *CTX) { + CTX->Pause(); + } + + void SetCustomCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory) { + CTX->CustomCPUFactory = std::move(Factory); + } + + void SetFallbackCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory) { + CTX->FallbackCPUFactory = std::move(Factory); + } + + uint64_t HandleSyscall(FEXCore::Context::Context *CTX, FEXCore::Core::ThreadState *Thread, FEXCore::HLE::SyscallArguments *Args) { + return CTX->SyscallHandler.HandleSyscall(reinterpret_cast(Thread), Args); + } + + bool AddVirtualMemoryMapping([[maybe_unused]] FEXCore::Context::Context *CTX, [[maybe_unused]] uint64_t VirtualAddress, [[maybe_unused]] uint64_t PhysicalAddress, [[maybe_unused]] uint64_t Size) { + return false; + } + + void RegisterExternalSyscallVisitor(FEXCore::Context::Context *CTX, [[maybe_unused]] uint64_t Syscall, [[maybe_unused]] FEXCore::HLE::SyscallVisitor *Visitor) { + } + +namespace Debug { + void CompileRIP(FEXCore::Context::Context *CTX, uint64_t RIP) { + CTX->CompileRIP(CTX->ParentThread, RIP); + } + uint64_t GetThreadCount(FEXCore::Context::Context *CTX) { + return CTX->GetThreadCount(); + } + + FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(FEXCore::Context::Context *CTX, uint64_t Thread) { + return CTX->GetRuntimeStatsForThread(Thread); + } + + FEXCore::Core::CPUState GetCPUState(FEXCore::Context::Context *CTX) { + return CTX->GetCPUState(); + } + + void GetMemoryRegions(FEXCore::Context::Context *CTX, std::vector *Regions) { + return CTX->GetMemoryRegions(Regions); + } + + bool GetDebugDataForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::Core::DebugData *Data) { + return CTX->GetDebugDataForRIP(RIP, Data); + } + + bool FindHostCodeForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, uint8_t **Code) { + return CTX->FindHostCodeForRIP(RIP, Code); + } + + // XXX: + // bool FindIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir) { + // return CTX->FindIRForRIP(RIP, ir); + // } + + // void SetIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir) { + // CTX->SetIRForRIP(RIP, ir); + // } + + FEXCore::Core::ThreadState *GetThreadState(FEXCore::Context::Context *CTX) { + return CTX->GetThreadState(); + } + + +} + +} diff --git a/Source/Interface/Context/Context.h b/Source/Interface/Context/Context.h new file mode 100644 index 000000000..f8c14b3bd --- /dev/null +++ b/Source/Interface/Context/Context.h @@ -0,0 +1,103 @@ +#pragma once +#include "Event.h" +#include "Interface/Core/CPUID.h" +#include "Interface/Core/Frontend.h" +#include "Interface/Core/InternalThreadState.h" +#include "Interface/HLE/Syscalls.h" +#include "Interface/Memory/MemMapper.h" +#include "Interface/IR/PassManager.h" +#include +#include +#include + +#include + +namespace FEXCore { +class SyscallHandler; +} + +namespace FEXCore::Context { + enum CoreRunningMode { + MODE_RUN, + MODE_SINGLESTEP, + }; + + struct Context { + friend class FEXCore::SyscallHandler; + struct { + bool Multiblock {false}; + bool BreakOnFrontendFailure {true}; + int64_t MaxInstPerBlock {-1LL}; + uint64_t VirtualMemSize {1ULL << 36}; + FEXCore::Config::ConfigCore Core {FEXCore::Config::CONFIG_INTERPRETER}; + + // LLVM JIT options + bool LLVM_MemoryValidation {false}; + bool LLVM_IRValidation {false}; + bool LLVM_PrinterPass {false}; + } Config; + + FEXCore::Memory::MemMapper MemoryMapper; + + std::mutex ThreadCreationMutex; + uint64_t ThreadID{}; + FEXCore::Core::InternalThreadState* ParentThread; + std::vector Threads; + std::atomic_bool ShouldStop{}; + Event PauseWait; + bool Running{}; + CoreRunningMode RunningMode {CoreRunningMode::MODE_RUN}; + FEXCore::Frontend::Decoder FrontendDecoder; + FEXCore::IR::PassManager PassManager; + + FEXCore::CPUIDEmu CPUID; + FEXCore::SyscallHandler SyscallHandler; + CustomCPUFactoryType CustomCPUFactory; + CustomCPUFactoryType FallbackCPUFactory; + + Context(); + ~Context(); + + bool InitCore(FEXCore::CodeLoader *Loader); + FEXCore::Context::ExitReason RunLoop(bool WaitForIdle); + bool IsPaused() const { return !Running; } + void Pause(); + + // Debugger interface + void CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP); + uint64_t GetThreadCount() const; + FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(uint64_t Thread); + FEXCore::Core::CPUState GetCPUState(); + void GetMemoryRegions(std::vector *Regions); + bool GetDebugDataForRIP(uint64_t RIP, FEXCore::Core::DebugData *Data); + bool FindHostCodeForRIP(uint64_t RIP, uint8_t **Code); + + // XXX: + // bool FindIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir); + // void SetIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir); + FEXCore::Core::ThreadState *GetThreadState(); + void LoadEntryList(); + + private: + void WaitForIdle(); + FEXCore::Core::InternalThreadState* CreateThread(FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID, uint64_t ChildTID); + void *MapRegion(FEXCore::Core::InternalThreadState *Thread, uint64_t Offset, uint64_t Size, bool Fixed = false); + void *ShmBase(); + void MirrorRegion(FEXCore::Core::InternalThreadState *Thread, void *HostPtr, uint64_t Offset, uint64_t Size); + void CopyMemoryMapping(FEXCore::Core::InternalThreadState *ParentThread, FEXCore::Core::InternalThreadState *ChildThread); + void InitializeThread(FEXCore::Core::InternalThreadState *Thread); + void ExecutionThread(FEXCore::Core::InternalThreadState *Thread); + void RunThread(FEXCore::Core::InternalThreadState *Thread); + + uintptr_t CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); + uintptr_t AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr); + + FEXCore::CodeLoader *LocalLoader{}; + + // Entry Cache + bool GetFilenameHash(std::string const &Filename, std::string &Hash); + void AddThreadRIPsToEntryList(FEXCore::Core::InternalThreadState *Thread); + void SaveEntryList(); + std::set EntryList; + }; +} diff --git a/Source/Interface/Core/BlockCache.cpp b/Source/Interface/Core/BlockCache.cpp new file mode 100644 index 000000000..ce79c66a8 --- /dev/null +++ b/Source/Interface/Core/BlockCache.cpp @@ -0,0 +1,54 @@ +#include "Interface/Context/Context.h" +#include "Interface/Core/Core.h" +#include "Interface/Core/BlockCache.h" +#include + +namespace FEXCore { +BlockCache::BlockCache(FEXCore::Context::Context *CTX) + : ctx {CTX} { + + // Block cache ends up looking like this + // PageMemoryMap[VirtualMemoryRegion >> 12] + // | + // v + // PageMemory[Memory & (VIRTUAL_PAGE_SIZE - 1)] + // | + // v + // Pointer to Code + // + // Allocate a region of memory that we can use to back our block pointers + // We need one pointer per page of virtual memory + // At 64GB of virtual memory this will allocate 128MB of virtual memory space + PagePointer = reinterpret_cast(mmap(nullptr, ctx->Config.VirtualMemSize / 4096 * 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + + // Allocate our memory backing our pageso + // We need 32KB per guest page (One pointer per byte) + // XXX: We can drop down to 16KB if we store 4byte offsets from the code base + // We currently limit to 128MB of real memory for caching for the total cache size. + // Can end up being inefficient if we compile a small number of blocks per page + PageMemory = reinterpret_cast(mmap(nullptr, CODE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + LogMan::Throw::A(PageMemory != -1ULL, "Failed to allocate page memory"); +} + +BlockCache::~BlockCache() { + munmap(reinterpret_cast(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8); + munmap(reinterpret_cast(PageMemory), CODE_SIZE); +} + +void BlockCache::HintUsedRange(uint64_t Address, uint64_t Size) { + // Tell the kernel we will definitely need [Address, Address+Size) mapped for the page pointer + // Page Pointer is allocated per page, so shift by page size + Address >>= 12; + Size >>= 12; + madvise(reinterpret_cast(PagePointer + Address), Size, MADV_WILLNEED); +} + +void BlockCache::ClearCache() { + // Clear out the page memory + madvise(reinterpret_cast(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8, MADV_DONTNEED); + madvise(reinterpret_cast(PageMemory), CODE_SIZE, MADV_DONTNEED); + AllocateOffset = 0; +} + +} + diff --git a/Source/Interface/Core/BlockCache.h b/Source/Interface/Core/BlockCache.h new file mode 100644 index 000000000..162f5281a --- /dev/null +++ b/Source/Interface/Core/BlockCache.h @@ -0,0 +1,104 @@ +#pragma once +#include +#include "LogManager.h" + +namespace FEXCore { +class BlockCache { +public: + + BlockCache(FEXCore::Context::Context *CTX); + ~BlockCache(); + + using BlockCacheIter = uintptr_t; + uintptr_t End() { return 0; } + + uintptr_t FindBlock(uint64_t Address) { + return FindCodePointerForAddress(Address); + } + + void Erase(uint64_t Address) { + uint64_t PageOffset = Address & (0x0FFF); + Address >>= 12; + + uintptr_t *Pointers = reinterpret_cast(PagePointer); + uint64_t PagePointer = Pointers[Address]; + if (!PagePointer) { + // Page for this code didn't even exist, nothing to do + return; + } + + // Page exists, just set the offset to zero + uintptr_t *BlockPointers = reinterpret_cast(PagePointer); + BlockPointers[PageOffset] = 0; + } + + uintptr_t AddBlockMapping(uint64_t Address, void *Ptr) { + uint64_t PageOffset = Address & (0x0FFF); + Address >>= 12; + uintptr_t *Pointers = reinterpret_cast(PagePointer); + uint64_t LocalPagePointer = Pointers[Address]; + if (!LocalPagePointer) { + // We don't have a page pointer for this address + // Allocate one now if we can + uintptr_t NewPageBacking = AllocateBackingForPage(); + if (!NewPageBacking) { + // Couldn't allocate, return so the frontend can recover from this + return 0; + } + Pointers[Address] = NewPageBacking; + LocalPagePointer = NewPageBacking; + } + + // Add the new pointer to the page block + uintptr_t *BlockPointers = reinterpret_cast(LocalPagePointer); + uintptr_t CastPtr = reinterpret_cast(Ptr); + BlockPointers[PageOffset] = CastPtr; + + return CastPtr; + } + + void ClearCache(); + + void HintUsedRange(uint64_t Address, uint64_t Size); + +private: + uintptr_t AllocateBackingForPage() { + uintptr_t NewBase = AllocateOffset; + uintptr_t NewEnd = AllocateOffset + SIZE_PER_PAGE; + + if (NewEnd >= CODE_SIZE) { + // We ran out of block backing space. Need to clear the block cache and tell the JIT cores to clear their caches as well + // Tell whatever is calling this that it needs to do it. + return 0; + } + + AllocateOffset = NewEnd; + return PageMemory + NewBase; + } + + uintptr_t FindCodePointerForAddress(uint64_t Address) { + uint64_t PageOffset = Address & (0x0FFF); + Address >>= 12; + uintptr_t *Pointers = reinterpret_cast(PagePointer); + uint64_t LocalPagePointer = Pointers[Address]; + if (!LocalPagePointer) { + // We don't have a page pointer for this address + return 0; + } + + // Find there pointer for the address in the blocks + uintptr_t *BlockPointers = reinterpret_cast(LocalPagePointer); + return BlockPointers[PageOffset]; + } + + uintptr_t PagePointer; + uintptr_t PageMemory; + + constexpr static size_t CODE_SIZE = 128 * 1024 * 1024; + constexpr static size_t SIZE_PER_PAGE = 4096 * 8; + size_t AllocateOffset {}; + + FEXCore::Context::Context *ctx; + +}; +} diff --git a/Source/Interface/Core/CPUID.cpp b/Source/Interface/Core/CPUID.cpp new file mode 100644 index 000000000..cdef44713 --- /dev/null +++ b/Source/Interface/Core/CPUID.cpp @@ -0,0 +1,118 @@ +#include "Interface/Core/CPUID.h" + +namespace FEXCore { + +CPUIDEmu::FunctionResults CPUIDEmu::Function_0h() { + CPUIDEmu::FunctionResults Res{}; + + Res.Res[0] = 0x16; // Let's say we are a Skylake + // EBX, EDX, ECX become the manufacturer id string + Res.Res[1] = 0x756E6547; // "Genu" + Res.Res[2] = 0x49656E69; // "ineI" + Res.Res[3] = 0x6C65746E; // "ntel" + return Res; +} + +// Processor Info and Features bits +CPUIDEmu::FunctionResults CPUIDEmu::Function_01h() { + CPUIDEmu::FunctionResults Res{}; + + Res.Res[0] = 0 | // Stepping + (0 << 4) | // Model + (0 << 8) | // Family ID + (0 << 12) | // Processor type + (0 << 16) | // Extended model ID + (0 << 20); // Extended family ID + Res.Res[1] = 0 | // Brand index + (8 << 8) | // Cache line size in bytes + (8 << 16) | // Number of addressable IDs for the logical cores in the physical CPU + (0 << 24); // Local APIC ID + Res.Res[2] = ~0U; // Let's say we support every feature for fun + Res.Res[3] = ~0U; // Let's say we support every feature for fun + + Res.Res[3] &= ~(3 << 26); // Let's say that XSAVE isn't enabled by the OS. Prevents glibc from using XSAVE/XGETBV + return Res; +} + +// Cache and TLB description +CPUIDEmu::FunctionResults CPUIDEmu::Function_02h() { + CPUIDEmu::FunctionResults Res{}; + return Res; +} + +// Deterministic cache parameters for each level +CPUIDEmu::FunctionResults CPUIDEmu::Function_04h() { + CPUIDEmu::FunctionResults Res{}; + return Res; +} + +CPUIDEmu::FunctionResults CPUIDEmu::Function_07h() { + CPUIDEmu::FunctionResults Res{}; + + // Number of subfunctions + Res.Res[0] = 0x0; + Res.Res[1] = + (1 << 0) | // FS/GS support + (1 << 3) | // BMI 1 support + (1 << 5) | // AVX2 support + (1 << 7) | // SMEP support + (1 << 8) // BMI2 support + ; + Res.Res[2] = ~0U; + Res.Res[3] = ~0U; + return Res; +} + +CPUIDEmu::FunctionResults CPUIDEmu::Function_0Dh() { + CPUIDEmu::FunctionResults Res{}; + return Res; +} +// Highest extended function implemented +CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0000h() { + CPUIDEmu::FunctionResults Res{}; + Res.Res[0] = 0x8000001F; + return Res; +} + +// Extended processor and feature bits +CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0001h() { + CPUIDEmu::FunctionResults Res{}; + Res.Res[2] = ~0U; // Let's say we support every feature for fun + Res.Res[3] = ~0U; // Let's say we support every feature for fun + return Res; +} + +// Advanced power management +CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0006h() { + CPUIDEmu::FunctionResults Res{}; + Res.Res[0] = (1 << 2); // APIC timer not affected by p-state + return Res; +} + +CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0007h() { + CPUIDEmu::FunctionResults Res{}; + return Res; +} + +// Virtual and physical address sizes +CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0008h() { + CPUIDEmu::FunctionResults Res{}; + return Res; +} + +void CPUIDEmu::Init() { + RegisterFunction(0, std::bind(&CPUIDEmu::Function_0h, this)); + RegisterFunction(1, std::bind(&CPUIDEmu::Function_01h, this)); + RegisterFunction(2, std::bind(&CPUIDEmu::Function_02h, this)); + RegisterFunction(4, std::bind(&CPUIDEmu::Function_04h, this)); + RegisterFunction(7, std::bind(&CPUIDEmu::Function_07h, this)); + RegisterFunction(0xD, std::bind(&CPUIDEmu::Function_0Dh, this)); + + RegisterFunction(0x8000'0000, std::bind(&CPUIDEmu::Function_8000_0000h, this)); + RegisterFunction(0x8000'0001, std::bind(&CPUIDEmu::Function_8000_0001h, this)); + RegisterFunction(0x8000'0006, std::bind(&CPUIDEmu::Function_8000_0006h, this)); + RegisterFunction(0x8000'0007, std::bind(&CPUIDEmu::Function_8000_0007h, this)); + RegisterFunction(0x8000'0008, std::bind(&CPUIDEmu::Function_8000_0008h, this)); +} +} + diff --git a/Source/Interface/Core/CPUID.h b/Source/Interface/Core/CPUID.h new file mode 100644 index 000000000..0418003ed --- /dev/null +++ b/Source/Interface/Core/CPUID.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include + +#include "LogManager.h" + +namespace FEXCore { + +class CPUIDEmu final { +public: + void Init(); + + struct FunctionResults { + // Results in registers EAX, EBX, EDX, ECX respectively + uint32_t Res[4]; + }; + + FunctionResults RunFunction(uint32_t Function) { + LogMan::Throw::A(FunctionHandlers.find(Function) != FunctionHandlers.end(), "Don't have a CPUID handler for 0x%08x", Function); + return FunctionHandlers[Function](); + } +private: + + using FunctionHandler = std::function; + void RegisterFunction(uint32_t Function, FunctionHandler Handler) { + FunctionHandlers[Function] = Handler; + } + + std::unordered_map FunctionHandlers; + + // Functions + FunctionResults Function_0h(); + FunctionResults Function_01h(); + FunctionResults Function_02h(); + FunctionResults Function_04h(); + FunctionResults Function_07h(); + FunctionResults Function_0Dh(); + FunctionResults Function_8000_0000h(); + FunctionResults Function_8000_0001h(); + FunctionResults Function_8000_0006h(); + FunctionResults Function_8000_0007h(); + FunctionResults Function_8000_0008h(); +}; +} diff --git a/Source/Interface/Core/Core.cpp b/Source/Interface/Core/Core.cpp new file mode 100644 index 000000000..d348f2ccd --- /dev/null +++ b/Source/Interface/Core/Core.cpp @@ -0,0 +1,800 @@ +#include "Common/MathUtils.h" +#include "Common/Paths.h" +#include "Interface/Context/Context.h" +#include "Interface/Core/BlockCache.h" +#include "Interface/Core/Core.h" +#include "Interface/Core/DebugData.h" +#include "Interface/Core/OpcodeDispatcher.h" +#include "Interface/Core/Interpreter/InterpreterCore.h" +#include "Interface/Core/JIT/JITCore.h" +#include "Interface/Core/LLVMJIT/LLVMCore.h" + +#include +#include +#include +#include +#include + +#include + +constexpr uint64_t STACK_OFFSET = 0xc000'0000; + +constexpr uint64_t FS_OFFSET = 0xb000'0000; +constexpr uint64_t FS_SIZE = 0x1000; + +namespace FEXCore::CPU { + bool CreateCPUCore(FEXCore::Context::Context *CTX) { + // This should be used for generating things that are shared between threads + CTX->CPUID.Init(); + return true; + } +} + +namespace FEXCore::Core { +constexpr std::array FlagNames = { + "CF", + "", + "PF", + "", + "AF", + "", + "ZF", + "SF", + "TF", + "IF", + "DF", + "OF", + "IOPL", + "", + "NT", + "", + "RF", + "VM", + "AC", + "VIF", + "VIP", + "ID", +}; + +std::string_view const& GetFlagName(unsigned Flag) { + return FlagNames[Flag]; +} + +namespace DefaultFallbackCore { + class DefaultFallbackCore final : public FEXCore::CPU::CPUBackend { + public: + explicit DefaultFallbackCore(FEXCore::Core::ThreadState *Thread) + : ThreadState {reinterpret_cast(Thread)} { + } + ~DefaultFallbackCore() override = default; + + std::string GetName() override { return "Default Fallback"; } + + void *MapRegion(void *HostPtr, uint64_t VirtualGuestPtr, uint64_t Size) override { + return HostPtr; + } + + void Initialize() override {} + bool NeedsOpDispatch() override { return false; } + + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) override { + LogMan::Msg::E("Fell back to default code handler at RIP: 0x%lx", ThreadState->State.State.rip); + return nullptr; + } + + private: + FEXCore::Core::InternalThreadState *ThreadState; + }; + + FEXCore::CPU::CPUBackend *CPUCreationFactory(FEXCore::Context::Context* CTX, FEXCore::Core::ThreadState *Thread) { + return new DefaultFallbackCore(Thread); + } +} + +} + +namespace FEXCore::Context { + Context::Context() + : FrontendDecoder {this} + , SyscallHandler {this} { + FallbackCPUFactory = FEXCore::Core::DefaultFallbackCore::CPUCreationFactory; + PassManager.AddDefaultPasses(); + // PassManager.AddDefaultValidationPasses(); + } + + bool Context::GetFilenameHash(std::string const &Filename, std::string &Hash) { + // Calculate a hash for the input file + std::ifstream Input (Filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if (Input.is_open()) { + std::streampos Size; + Size = Input.tellg(); + Input.seekg(0, std::ios::beg); + std::string Data; + Data.resize(Size); + Input.read(&Data.at(0), Size); + Input.close(); + + std::hash string_hash; + Hash = std::to_string(string_hash(Data)); + return true; + } + return false; + } + + void Context::AddThreadRIPsToEntryList(FEXCore::Core::InternalThreadState *Thread) { + for (auto &IR : Thread->IRLists) { + EntryList.insert(IR.first); + } + } + + void Context::SaveEntryList() { + std::string const &Filename = SyscallHandler.GetFilename(); + std::string hash_string; + + if (GetFilenameHash(Filename, hash_string)) { + auto DataPath = FEXCore::Paths::GetDataPath(); + DataPath += "/EntryCache/Entries_" + hash_string; + + std::ofstream Output (DataPath.c_str(), std::ios::out | std::ios::binary); + if (Output.is_open()) { + for (auto Entry : EntryList) { + Output.write(reinterpret_cast(&Entry), sizeof(Entry)); + } + Output.close(); + } + } + } + + void Context::LoadEntryList() { + std::string const &Filename = SyscallHandler.GetFilename(); + std::string hash_string; + + if (GetFilenameHash(Filename, hash_string)) { + auto DataPath = FEXCore::Paths::GetDataPath(); + DataPath += "/EntryCache/Entries_" + hash_string; + + std::ifstream Input (DataPath.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if (Input.is_open()) { + std::streampos Size; + Size = Input.tellg(); + Input.seekg(0, std::ios::beg); + std::string Data; + Data.resize(Size); + Input.read(&Data.at(0), Size); + Input.close(); + size_t EntryCount = Size / sizeof(uint64_t); + uint64_t *Entries = reinterpret_cast(&Data.at(0)); + + for (size_t i = 0; i < EntryCount; ++i) { + EntryList.insert(Entries[i]); + } + } + } + } + + Context::~Context() { + ShouldStop.store(true); + + Pause(); + { + std::lock_guard lk(ThreadCreationMutex); + for (auto &Thread : Threads) { + Thread->ExecutionThread.join(); + } + + for (auto &Thread : Threads) { + AddThreadRIPsToEntryList(Thread); + } + + for (auto &Thread : Threads) { + delete Thread; + } + Threads.clear(); + } + + SaveEntryList(); + } + + bool Context::InitCore(FEXCore::CodeLoader *Loader) { + LocalLoader = Loader; + using namespace FEXCore::Core; + FEXCore::Core::CPUState NewThreadState{}; + + // Initialize default CPU state + NewThreadState.rip = ~0ULL; + for (int i = 0; i < 16; ++i) { + NewThreadState.gregs[i] = 0; + } + + for (int i = 0; i < 16; ++i) { + NewThreadState.xmm[i][0] = 0xDEADBEEFULL; + NewThreadState.xmm[i][1] = 0xBAD0DAD1ULL; + } + memset(NewThreadState.flags, 0, 32); + NewThreadState.gs = 0; + NewThreadState.fs = FS_OFFSET + FS_SIZE / 2; + NewThreadState.flags[1] = 1; + + FEXCore::Core::InternalThreadState *Thread = CreateThread(&NewThreadState, 0, 0); + + // We are the parent thread + ParentThread = Thread; + + auto MemLayout = Loader->GetLayout(); + + uint64_t BasePtr = AlignDown(std::get<0>(MemLayout), PAGE_SIZE); + uint64_t BaseSize = AlignUp(std::get<2>(MemLayout), PAGE_SIZE); + + Thread->BlockCache->HintUsedRange(BasePtr, BaseSize); + + uintptr_t BaseRegion = reinterpret_cast(MapRegion(Thread, BasePtr, BaseSize, true)); + + auto MemoryMapperFunction = [&](uint64_t Base, uint64_t Size) -> void* { + return MapRegion(Thread, Base, Size); + }; + + Loader->MapMemoryRegion(MemoryMapperFunction); + + // Set up all of our memory mappings + MapRegion(Thread, FS_OFFSET, FS_SIZE, true); + + void *StackPointer = MapRegion(Thread, STACK_OFFSET, Loader->StackSize(), true); + Thread->State.State.gregs[X86State::REG_RSP] = Loader->SetupStack(StackPointer, STACK_OFFSET); + + // Now let the code loader setup memory + auto MemoryWriterFunction = [&](void const *Data, uint64_t Addr, uint64_t Size) -> void { + // Writes the machine code to be emulated in to memory + memcpy(reinterpret_cast(BaseRegion + Addr), Data, Size); + }; + + Loader->LoadMemory(MemoryWriterFunction); + + // Set the RIP to what the code loader wants + Thread->State.State.rip = Loader->DefaultRIP(); + + LogMan::Msg::D("Memory Base: 0x%016lx", MemoryMapper.GetBaseOffset(0)); + + InitializeThread(Thread); + + return true; + } + + void Context::WaitForIdle() { + do { + bool AllPaused = true; + { + // Grab the mutex lock so a thread doesn't try and spin up while we are waiting + for (size_t i = 0; i < Threads.size(); ++i) { + if (Threads[i]->State.RunningEvents.Running.load() || Threads[i]->State.RunningEvents.WaitingToStart.load()) { + AllPaused = false; + break; + } + } + } + + if (AllPaused) + break; + + PauseWait.WaitFor(std::chrono::seconds(1)); + } while (true); + } + + void Context::Pause() { + // Tell all the threads that they should pause + { + std::lock_guard lk(ThreadCreationMutex); + for (auto &Thread : Threads) { + Thread->State.RunningEvents.ShouldPause.store(true); + } + + for (auto &Thread : Threads) { + Thread->StartRunning.NotifyAll(); + } + Running = true; + } + + WaitForIdle(); + Running = false; + } + + FEXCore::Context::ExitReason Context::RunLoop(bool WaitForIdle) { + { + // Spin up all the threads + std::lock_guard lk(ThreadCreationMutex); + for (auto &Thread : Threads) { + Thread->State.RunningEvents.ShouldPause.store(false); + Thread->State.RunningEvents.WaitingToStart.store(true); + } + + for (auto &Thread : Threads) { + Thread->StartRunning.NotifyAll(); + } + Running = true; + } + + if (WaitForIdle) { + this->WaitForIdle(); + return ParentThread->ExitReason; + } + + return FEXCore::Context::ExitReason::EXIT_ASYNC_RUN; + } + + void Context::InitializeThread(FEXCore::Core::InternalThreadState *Thread) { + Thread->CPUBackend->Initialize(); + Thread->FallbackBackend->Initialize(); + + // Compile all of our cached entries + LogMan::Msg::D("Precompiling: %ld blocks", EntryList.size()); + for (auto Entry : EntryList) { + CompileRIP(Thread, Entry); + } + + // This will create the execution thread but it won't actually start executing + Thread->ExecutionThread = std::thread(&Context::ExecutionThread, this, Thread); + + // Wait for the thread to have started + Thread->ThreadWaiting.Wait(); + } + + void Context::RunThread(FEXCore::Core::InternalThreadState *Thread) { + // Tell the thread to start executing + Thread->StartRunning.NotifyAll(); + } + + FEXCore::Core::InternalThreadState* Context::CreateThread(FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID, uint64_t ChildTID) { + FEXCore::Core::InternalThreadState *Thread{}; + + // Grab the new thread object + { + std::lock_guard lk(ThreadCreationMutex); + Thread = Threads.emplace_back(new FEXCore::Core::InternalThreadState{}); + Thread->State.ThreadManager.TID = ++ThreadID; + } + + Thread->OpDispatcher = std::make_unique(); + Thread->BlockCache = std::make_unique(this); + Thread->CTX = this; + + // Copy over the new thread state to the new object + memcpy(&Thread->State.State, NewThreadState, sizeof(FEXCore::Core::CPUState)); + + // Set up the thread manager state + Thread->State.ThreadManager.parent_tid = ParentTID; + Thread->State.ThreadManager.child_tid = ChildTID; + + // Create CPU backend + switch (Config.Core) { + case FEXCore::Config::CONFIG_INTERPRETER: Thread->CPUBackend.reset(FEXCore::CPU::CreateInterpreterCore(this)); break; + case FEXCore::Config::CONFIG_IRJIT: Thread->CPUBackend.reset(FEXCore::CPU::CreateJITCore(this, Thread)); break; + case FEXCore::Config::CONFIG_LLVMJIT: Thread->CPUBackend.reset(FEXCore::CPU::CreateLLVMCore(Thread)); break; + case FEXCore::Config::CONFIG_CUSTOM: Thread->CPUBackend.reset(CustomCPUFactory(this, &Thread->State)); break; + default: LogMan::Msg::A("Unknown core configuration"); + } + + Thread->FallbackBackend.reset(FallbackCPUFactory(this, &Thread->State)); + + LogMan::Throw::A(!Thread->FallbackBackend->NeedsOpDispatch(), "Fallback CPU backend must not require OpDispatch"); + + return Thread; + } + + uintptr_t Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) { + auto BlockMapPtr = Thread->BlockCache->AddBlockMapping(Address, Ptr); + if (BlockMapPtr == 0) { + Thread->BlockCache->ClearCache(); + BlockMapPtr = Thread->BlockCache->AddBlockMapping(Address, Ptr); + LogMan::Throw::A(BlockMapPtr, "Couldn't add mapping after clearing mapping cache"); + } + + return BlockMapPtr; + } + + uintptr_t Context::CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { + void *CodePtr {nullptr}; + uint8_t const *GuestCode = MemoryMapper.GetPointer(GuestRIP); + + uint64_t TotalInstructions {0}; + uint64_t TotalInstructionsLength {0}; + + // Do we already have this in the IR cache? + auto IR = Thread->IRLists.find(GuestRIP); + FEXCore::IR::IRListView *IRList {}; + FEXCore::Core::DebugData *DebugData {}; + + if (IR == Thread->IRLists.end()) { + bool HadDispatchError {false}; + [[maybe_unused]] bool HadRIPSetter {false}; + + Thread->OpDispatcher->BeginBlock(); + if (!FrontendDecoder.DecodeInstructionsInBlock(&GuestCode[TotalInstructionsLength], GuestRIP + TotalInstructionsLength)) { + if (Config.BreakOnFrontendFailure) { + LogMan::Msg::E("Had Frontend decoder error"); + ShouldStop = true; + } + return 0; + } + + auto DecodedOps = FrontendDecoder.GetDecodedInsts(); + for (size_t i = 0; i < DecodedOps.second; ++i) { + FEXCore::X86Tables::X86InstInfo const* TableInfo {nullptr}; + FEXCore::X86Tables::DecodedInst const* DecodedInfo {nullptr}; + + TableInfo = DecodedOps.first->at(i).TableInfo; + DecodedInfo = &DecodedOps.first->at(i); + // if (FrontendDecoder.JumpTargets.find(DecodedInfo->PC) != FrontendDecoder.JumpTargets.end()) { + // Thread->OpDispatcher->_EndBlock(0); + // auto JumpTarget = Thread->OpDispatcher->_BeginBlock(); + // Thread->OpDispatcher->Arguments.JumpTargets.emplace(DecodedInfo->PC, JumpTarget.location); + // } + + // // Check our fixups to see if they still are necessary + // auto fixup = Thread->OpDispatcher->Arguments.Fixups.find(DecodedInfo->PC); + // if (fixup != Thread->OpDispatcher->Arguments.Fixups.end()) { + // IR::AlignmentType JumpTarget; + // auto it = Thread->OpDispatcher->Arguments.JumpTargets.find(DecodedInfo->PC); + // if (it != Thread->OpDispatcher->Arguments.JumpTargets.end()) { + // JumpTarget = it->second; + // } + + // for (auto it : fixup->second) { + // switch (it.SourceCondJump->Op) { + // case FEXCore::IR::OP_CONDJUMP: { + // if (JumpTarget.IsInvalid()) { + // Thread->OpDispatcher->_EndBlock(0); + // JumpTarget = Thread->OpDispatcher->_BeginBlock().location; + // } + // auto CondJumpOp = it.SourceCondJump->CW(); + // CondJumpOp->Location = JumpTarget; + // break; + // } + // case FEXCore::IR::OP_JUMP: { + // if (JumpTarget.IsInvalid()) { + // Thread->OpDispatcher->_EndBlock(0); + // JumpTarget = Thread->OpDispatcher->_BeginBlock().location; + // } + // auto JumpOp = it.SourceCondJump->CW(); + // JumpOp->Location = JumpTarget; + // break; + // } + + // default: + // LogMan::Msg::A("Unknown fixup kind"); + // break; + // } + // } + + // // No longer need this fixup + // Thread->OpDispatcher->Arguments.Fixups.erase(fixup); + // } + + if (TableInfo->OpcodeDispatcher) { + auto Fn = TableInfo->OpcodeDispatcher; + std::invoke(Fn, Thread->OpDispatcher, DecodedInfo); + if (Thread->OpDispatcher->HadDecodeFailure()) { + if (Config.BreakOnFrontendFailure) { + LogMan::Msg::E("Had OpDispatcher error at 0x%lx", GuestRIP); + ShouldStop = true; + } + HadDispatchError = true; + } + else { + TotalInstructionsLength += DecodedInfo->InstSize; + TotalInstructions++; + } + } + else { + // LogMan::Msg::E("Missing OpDispatcher at 0x%lx", GuestRIP); + HadDispatchError = true; + } + + // If we had a dispatch error then leave early + if (HadDispatchError) { + if (TotalInstructions == 0) { + // Couldn't handle any instruction in op dispatcher + Thread->OpDispatcher->ResetWorkingList(); + return 0; + } + else { + // We had some instructions. Early exit + break; + } + } + + // This is to make sure if we are stepping or make a block that is too large that we will still set the block + if (!HadDispatchError && (TableInfo->Flags & X86Tables::InstFlags::FLAGS_SETS_RIP)) { + HadRIPSetter = true; + } + + if (TotalInstructions >= Config.MaxInstPerBlock) { + break; + } + } + + //LogMan::Throw::A(Thread->OpDispatcher->Arguments.Fixups.empty(), "Still had fixups that weren't fixed!"); + + if (!Thread->OpDispatcher->Information.HadUnconditionalExit) + { + Thread->OpDispatcher->EndBlock(TotalInstructionsLength); + Thread->OpDispatcher->ExitFunction(); + } + + + // Run the passmanager over the IR from the dispatcher + PassManager.Run(Thread->OpDispatcher.get()); + + if (Thread->OpDispatcher->ShouldDump) +// if (GuestRIP == 0x48b680) + { + std::stringstream out; + auto NewIR = Thread->OpDispatcher->ViewIR(); + FEXCore::IR::Dump(&out, &NewIR); + printf("IR 0x%lx:\n%s\n@@@@@\n", GuestRIP, out.str().c_str()); + } + + // Do RA on the IR right now? + + // Create a copy of the IR and place it in this thread's IR cache + auto IR = Thread->IRLists.try_emplace(GuestRIP, Thread->OpDispatcher->CreateIRCopy()); + Thread->OpDispatcher->ResetWorkingList(); + + auto Debugit = Thread->DebugData.try_emplace(GuestRIP, FEXCore::Core::DebugData{}); + Debugit.first->second.GuestCodeSize = TotalInstructionsLength; + Debugit.first->second.GuestInstructionCount = TotalInstructions; + + IRList = IR.first->second.get(); + DebugData = &Debugit.first->second; + Thread->Stats.BlocksCompiled.fetch_add(1); + } + else { + IRList = IR->second.get(); + } + + // Attempt to get the CPU backend to compile this code + CodePtr = Thread->CPUBackend->CompileCode(IRList, DebugData); + + if (CodePtr != nullptr) { + // The core managed to compile the code. + return AddBlockMapping(Thread, GuestRIP, CodePtr); + } + + return 0; + } + + void Context::ExecutionThread(FEXCore::Core::InternalThreadState *Thread) { + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_WAITING; + + Thread->ThreadWaiting.NotifyAll(); + Thread->StartRunning.Wait(); + + if (ShouldStop.load() || Thread->State.RunningEvents.ShouldStop.load()) { + ShouldStop = true; + Thread->State.RunningEvents.ShouldStop.store(true); + Thread->State.RunningEvents.Running.store(false); + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN; + return; + } + + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE; + + Thread->State.RunningEvents.Running = true; + Thread->State.RunningEvents.ShouldPause = false; + constexpr uint32_t CoreDebugLevel = 0; + while (!ShouldStop.load() && !Thread->State.RunningEvents.ShouldStop.load()) { + uint64_t GuestRIP = Thread->State.State.rip; + + if (CoreDebugLevel >= 1) { + char const *Name = LocalLoader->FindSymbolNameInRange(GuestRIP); + LogMan::Msg::D(">>>>RIP: 0x%lx: '%s'", GuestRIP, Name ? Name : ""); + } + + using BlockFn = void (*)(FEXCore::Core::InternalThreadState *Thread); + + if (!Thread->CPUBackend->NeedsOpDispatch()) { + BlockFn Ptr = reinterpret_cast(Thread->CPUBackend->CompileCode(nullptr, nullptr)); + Ptr(Thread); + } + else { + // Do have have this block compiled? + auto it = Thread->BlockCache->FindBlock(GuestRIP); + if (it == 0) { + // If not compile it + it = CompileBlock(Thread, GuestRIP); + } + + // Did we successfully compile this block? + if (it != 0) { + // Block is compiled, run it + BlockFn Ptr = reinterpret_cast(it); + Ptr(Thread); + } + else { + // We have ONE more chance to try and fallback to the fallback CPU backend + // This will most likely fail since regular code use won't be using a fallback core. + // It's mainly for testing new instruction encodings + void *CodePtr = Thread->FallbackBackend->CompileCode(nullptr, nullptr); + if (CodePtr) { + BlockFn Ptr = reinterpret_cast(AddBlockMapping(Thread, GuestRIP, CodePtr)); + Ptr(Thread); + } + else { + // Let the frontend know that something has happened that is unhandled + Thread->State.RunningEvents.ShouldPause = true; + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_UNKNOWNERROR; + } + } + } +// if (GuestRIP == 0x48c8dd) { +// fflush(stdout); +// __builtin_trap(); +// } + + if (CoreDebugLevel >= 2) { + int i = 0; + LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]); + i += 4; + LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]); + i += 4; + LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]); + i += 4; + LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]); + uint64_t PackedFlags{}; + for (unsigned i = 0; i < 32; ++i) { + PackedFlags |= static_cast(Thread->State.State.flags[i]) << i; + } + LogMan::Msg::D("\tFlags: %016lx", PackedFlags); + } + + if (CoreDebugLevel >= 3) { + int i = 0; + LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]); + LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]); + + i += 4; + LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]); + LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]); + i += 4; + LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]); + LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]); + i += 4; + LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]); + LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]); + uint64_t PackedFlags{}; + for (unsigned i = 0; i < 32; ++i) { + PackedFlags |= static_cast(Thread->State.State.flags[i]) << i; + } + LogMan::Msg::D("\tFlags: %016lx", PackedFlags); + } + + if (Thread->State.RunningEvents.ShouldStop.load()) { + // If it is the parent thread that died then just leave + // XXX: This doesn't make sense when the parent thread doesn't outlive its children + if (Thread->State.ThreadManager.GetTID() == 1) { + ShouldStop = true; + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN; + } + break; + } + + if (RunningMode == FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP || Thread->State.RunningEvents.ShouldPause) { + Thread->State.RunningEvents.Running = false; + Thread->State.RunningEvents.WaitingToStart = false; + + // If something previously hasn't set the exit state then set it now + if (Thread->ExitReason == FEXCore::Context::ExitReason::EXIT_NONE) + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_DEBUG; + + PauseWait.NotifyAll(); + Thread->StartRunning.Wait(); + + // If we set it to debug then set it back to none after this + // We want to retain the state if the frontend decides to leave + if (Thread->ExitReason == FEXCore::Context::ExitReason::EXIT_DEBUG) + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE; + + Thread->State.RunningEvents.Running = true; + } + } + + Thread->State.RunningEvents.WaitingToStart = false; + Thread->State.RunningEvents.Running = false; + } + + // Debug interface + void Context::CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP) { + uint64_t RIPBackup = Thread->State.State.rip; + Thread->State.State.rip = RIP; + + // Erase the RIP from all the storage backings if it exists + Thread->IRLists.erase(RIP); + Thread->DebugData.erase(RIP); + Thread->BlockCache->Erase(RIP); + + // We don't care if compilation passes or not + CompileBlock(Thread, RIP); + + Thread->State.State.rip = RIPBackup; + } + + void *Context::MapRegion(FEXCore::Core::InternalThreadState *Thread, uint64_t Offset, uint64_t Size, bool Fixed) { + void *Ptr = MemoryMapper.MapRegion(Offset, Size, Fixed); + Thread->CPUBackend->MapRegion(Ptr, Offset, Size); + Thread->FallbackBackend->MapRegion(Ptr, Offset, Size); + return Ptr; + } + + void Context::MirrorRegion(FEXCore::Core::InternalThreadState *Thread, void *HostPtr, uint64_t Offset, uint64_t Size) { + Thread->CPUBackend->MapRegion(HostPtr, Offset, Size); + Thread->FallbackBackend->MapRegion(HostPtr, Offset, Size); + } + + void *Context::ShmBase() { + return MemoryMapper.GetMemoryBase(); + } + + void Context::CopyMemoryMapping([[maybe_unused]] FEXCore::Core::InternalThreadState *ParentThread, FEXCore::Core::InternalThreadState *ChildThread) { + auto Regions = MemoryMapper.MappedRegions; + for (auto const& Region : Regions) { + ChildThread->CPUBackend->MapRegion(Region.Ptr, Region.Offset, Region.Size); + ChildThread->FallbackBackend->MapRegion(Region.Ptr, Region.Offset, Region.Size); + } + } + + uint64_t Context::GetThreadCount() const { + return Threads.size(); + } + + FEXCore::Core::RuntimeStats *Context::GetRuntimeStatsForThread(uint64_t Thread) { + return &Threads[Thread]->Stats; + } + + FEXCore::Core::CPUState Context::GetCPUState() { + return ParentThread->State.State; + } + + void Context::GetMemoryRegions(std::vector *Regions) { + Regions->clear(); + Regions->resize(MemoryMapper.MappedRegions.size()); + memcpy(&Regions->at(0), &MemoryMapper.MappedRegions.at(0), sizeof(FEXCore::Memory::MemRegion) * MemoryMapper.MappedRegions.size()); + } + + bool Context::GetDebugDataForRIP(uint64_t RIP, FEXCore::Core::DebugData *Data) { + auto it = ParentThread->DebugData.find(RIP); + if (it == ParentThread->DebugData.end()) { + return false; + } + + memcpy(Data, &it->second, sizeof(FEXCore::Core::DebugData)); + return true; + } + + bool Context::FindHostCodeForRIP(uint64_t RIP, uint8_t **Code) { + uintptr_t HostCode = ParentThread->BlockCache->FindBlock(RIP); + if (!HostCode) { + return false; + } + + *Code = reinterpret_cast(HostCode); + return true; + } + + // XXX: + // bool Context::FindIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir) { + // auto IR = ParentThread->IRLists.find(RIP); + // if (IR == ParentThread->IRLists.end()) { + // return false; + // } + + // //*ir = &IR->second; + // return true; + // } + + // void Context::SetIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir) { + // //ParentThread->IRLists.try_emplace(RIP, *ir); + // } + + FEXCore::Core::ThreadState *Context::GetThreadState() { + return &ParentThread->State; + } + +} diff --git a/Source/Interface/Core/Core.h b/Source/Interface/Core/Core.h new file mode 100644 index 000000000..3279042b8 --- /dev/null +++ b/Source/Interface/Core/Core.h @@ -0,0 +1,23 @@ +#pragma once + +namespace FEXCore { + class CodeLoader; +} + +namespace FEXCore::Context { + struct Context; +} + +namespace FEXCore::CPU { + + /** + * @brief Create the CPU core backend for the context passed in + * + * @param CTX + * + * @return true if core was able to be create + */ + bool CreateCPUCore(FEXCore::Context::Context *CTX); + + bool LoadCode(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader); +} diff --git a/Source/Interface/Core/DebugData.h b/Source/Interface/Core/DebugData.h new file mode 100644 index 000000000..d5f80cd80 --- /dev/null +++ b/Source/Interface/Core/DebugData.h @@ -0,0 +1,5 @@ +#pragma once +#include + +namespace FEXCore::CPU { +} diff --git a/Source/Interface/Core/Frontend.cpp b/Source/Interface/Core/Frontend.cpp new file mode 100644 index 000000000..d891455bf --- /dev/null +++ b/Source/Interface/Core/Frontend.cpp @@ -0,0 +1,927 @@ +#include "Interface/Context/Context.h" +#include "Interface/Core/Frontend.h" +#include "Interface/Core/InternalThreadState.h" +#include "LogManager.h" + +#include +#include +#include +#include + +namespace FEXCore::Frontend { +using namespace FEXCore::X86Tables; +constexpr size_t MAX_INST_SIZE = 15; + +static uint32_t MapModRMToReg(uint8_t REX, uint8_t bits, bool HighBits, bool HasREX, bool HasXMM, uint8_t InvalidOffset = 16) { + constexpr std::array GPRIndexes = { + // Classical ordering? + FEXCore::X86State::REG_RAX, + FEXCore::X86State::REG_RCX, + FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_RBX, + FEXCore::X86State::REG_RSP, + FEXCore::X86State::REG_RBP, + FEXCore::X86State::REG_RSI, + FEXCore::X86State::REG_RDI, + FEXCore::X86State::REG_R8, + FEXCore::X86State::REG_R9, + FEXCore::X86State::REG_R10, + FEXCore::X86State::REG_R11, + FEXCore::X86State::REG_R12, + FEXCore::X86State::REG_R13, + FEXCore::X86State::REG_R14, + FEXCore::X86State::REG_R15, + }; + + constexpr std::array GPR8BitHighIndexes = { + // Classical ordering? + FEXCore::X86State::REG_RAX, + FEXCore::X86State::REG_RCX, + FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_RBX, + FEXCore::X86State::REG_RAX, + FEXCore::X86State::REG_RCX, + FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_RBX, + FEXCore::X86State::REG_R8, + FEXCore::X86State::REG_R9, + FEXCore::X86State::REG_R10, + FEXCore::X86State::REG_R11, + FEXCore::X86State::REG_R12, + FEXCore::X86State::REG_R13, + FEXCore::X86State::REG_R14, + FEXCore::X86State::REG_R15, + }; + + constexpr std::array XMMIndexes = { + FEXCore::X86State::REG_XMM_0, + FEXCore::X86State::REG_XMM_1, + FEXCore::X86State::REG_XMM_2, + FEXCore::X86State::REG_XMM_3, + FEXCore::X86State::REG_XMM_4, + FEXCore::X86State::REG_XMM_5, + FEXCore::X86State::REG_XMM_6, + FEXCore::X86State::REG_XMM_7, + FEXCore::X86State::REG_XMM_8, + FEXCore::X86State::REG_XMM_9, + FEXCore::X86State::REG_XMM_10, + FEXCore::X86State::REG_XMM_11, + FEXCore::X86State::REG_XMM_12, + FEXCore::X86State::REG_XMM_13, + FEXCore::X86State::REG_XMM_14, + FEXCore::X86State::REG_XMM_15, + }; + + const std::array *GPRs = &GPRIndexes; + if (HasXMM) { + GPRs = &XMMIndexes; + } + else if (HighBits && !HasREX) { + GPRs = &GPR8BitHighIndexes; + } + + uint8_t Offset = (REX << 3) | bits; + + if (Offset == InvalidOffset) { + return FEXCore::X86State::REG_INVALID; + } + return (*GPRs)[(REX << 3) | bits]; +} + +Decoder::Decoder(FEXCore::Context::Context *ctx) + : CTX {ctx} { + DecodedBuffer.resize(DefaultDecodedBufferSize); +} + +bool Decoder::DecodeInstruction(uint8_t const* InstStream, uint64_t PC) { + uint8_t InstructionSize = 0; + std::array Instruction; + bool InstructionDecoded = false; + bool ErrorDuringDecoding = false; + auto ReadByte = [InstStream, &Instruction, &InstructionSize]() -> uint8_t { + uint8_t Byte = InstStream[InstructionSize]; + InstructionSize++; + LogMan::Throw::A(InstructionSize < MAX_INST_SIZE, "Max instruction size exceeded!"); + Instruction[InstructionSize] = Byte; + return Byte; + }; + + auto PeekByte = [InstStream, &InstructionSize](uint8_t Offset) -> uint8_t { + uint8_t Byte = InstStream[InstructionSize + Offset]; + return Byte; + }; + + auto ReadData = [&ReadByte, InstStream, &InstructionSize](size_t Size) -> uint64_t { + uint64_t Res; +#define READ_DATA(x, y) \ + case x: { \ + y const *Data = reinterpret_cast(&InstStream[InstructionSize]); \ + Res = *Data; \ + } \ + break + + switch (Size) { + case 0: return 0; + READ_DATA(1, uint8_t); + READ_DATA(2, uint16_t); + READ_DATA(4, uint32_t); + READ_DATA(8, uint64_t); + default: + LogMan::Msg::A("Unknown data size to read"); + } +#undef READ_DATA + + for(size_t i = 0; i < Size; ++i) { + ReadByte(); + } + return Res; + }; + + auto &DecodeInst = DecodedBuffer[DecodedSize]; + memset(&DecodeInst, 0, sizeof(DecodedInst)); + + auto DecodeModRM = [&DecodeInst](FEXCore::X86Tables::ModRMDecoded ModRM, uint8_t *Displacement) { + // Do we have an offset? + if (ModRM.mod == 0b01) { + *Displacement = 1; + } + else if (ModRM.mod == 0b10) { + *Displacement = 4; + } + else if (ModRM.mod == 0 && ModRM.rm == 0b101) + *Displacement = 4; + + // Ensure this flag is set + DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT; + }; + + auto DecodeSIB = [&ReadByte, &DecodeInst](FEXCore::X86Tables::ModRMDecoded ModRM, uint8_t *Displacement) -> bool { + bool HasSIB = ((ModRM.mod != 0b11) && + (ModRM.rm == 0b100)); + + if (HasSIB) { + FEXCore::X86Tables::SIBDecoded SIB; + if (DecodeInst.DecodedSIB) { + SIB.Hex = DecodeInst.SIB; + } + else { + // Haven't yet grabbed SIB, pull it now + DecodeInst.SIB = ReadByte(); + SIB.Hex = DecodeInst.SIB; + DecodeInst.DecodedSIB = true; + } + + // Ensure this flag is set + DecodeInst.Flags |= DecodeFlags::FLAG_SIB_PRESENT; + + // If the SIB base is 0b101, aka BP or R13 then we have a 32bit displacement + if (ModRM.mod == 0b01) { + *Displacement = 1; + } + else if (ModRM.mod == 0b10) { + *Displacement = 4; + } + else if (ModRM.mod == 0b00 && ModRM.rm == 0b101) { + *Displacement = 4; + } + else if (ModRM.mod == 0b00 && ModRM.rm == 0b100 && SIB.base == 0b101) { + *Displacement = 4; + } + } + + return HasSIB; + }; + + auto NormalOp = [&DecodeModRM, &DecodeSIB, &ReadByte, &ReadData, &DecodeInst, &InstructionSize](auto &Table, auto Op) -> bool { + FEXCore::X86Tables::X86InstInfo *Info = &Table[Op]; + + DecodeInst.OP = Op; + DecodeInst.TableInfo = Info; + + // XXX: Once we support 32bit x86 then this will be necessary to support + if (Info->Type == FEXCore::X86Tables::TYPE_LEGACY_PREFIX) { + DecodeInst.Flags |= DecodeFlags::FLAG_LEGACY_PREFIX; + LogMan::Msg::A("Legacy Prefix"); + return false; + } + if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN || + Info->Type == FEXCore::X86Tables::TYPE_INVALID) { + LogMan::Msg::A("Invalid or Unknown instruction: %s 0x%04x 0x%lx", Info->Name, Op, DecodeInst.PC); + return false; + } + if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && + Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) { + LogMan::Msg::A("Group Ops should have been decoded before this!"); + return false; + } + + // New instruction size decoding + { + // Decode destinations first + uint32_t DstSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeDstFlags(Info->Flags); + uint32_t SrcSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeSrcFlags(Info->Flags); + + if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_8BIT); + } + else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT); + } + else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_128BIT); + } + else if (DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE && + DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF) { + // See table 1-2. Operand-Size Overrides for this decoding + // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT); + } + else if (DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING || + DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_64BIT); + } + else { + DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_32BIT); + } + + // Decode sources + if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_8BIT); + } + else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT); + } + else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_128BIT); + } + else if (DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE && + SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF) { + // See table 1-2. Operand-Size Overrides for this decoding + // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT); + } + else if (DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING || + SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT) { + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_64BIT); + } + else { + DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_32BIT); + } + } + + // Is ModRM present via explicit instruction encoded or REX? + bool HasMODRM = !!(DecodeInst.Flags & DecodeFlags::FLAG_MODRM_PRESENT); + HasMODRM |= !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM); + + bool HasSIB = false; + bool HasWideningDisplacement = DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING; + bool HasNarrowingDisplacement = DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE; + // This is used for ModRM register modification + // For both modrm.reg and modrm.rm(when mod == 0b11) when value is >= 0b100 + // then it changes from expected registers to the high 8bits of the lower registers + // Bit annoying to support + // In the case of no modrm (REX in byte situation) then it is unaffected + bool Is8BitSrc = (DecodeFlags::GetSizeSrcFlags(DecodeInst.Flags) == DecodeFlags::SIZE_8BIT); + bool Is8BitDest = (DecodeFlags::GetSizeDstFlags(DecodeInst.Flags) == DecodeFlags::SIZE_8BIT); + bool HasREX = !!(DecodeInst.Flags & DecodeFlags::FLAG_REX_PREFIX); + bool HasXMMSrc = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS) && !HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_GPR); + bool HasXMMDst = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS) && !HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_GPR); + bool HasHighXMM = HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_HIGH_XMM_REG); + uint8_t Displacement = 0; + + auto *CurrentDest = &DecodeInst.Dest; + + if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) || + HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RDX)) { + // Some instructions hardcode their destination as RAX + CurrentDest->TypeGPR.Type = DecodedOperand::TYPE_GPR; + CurrentDest->TypeGPR.HighBits = false; + CurrentDest->TypeGPR.GPR = HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ? FEXCore::X86State::REG_RAX : FEXCore::X86State::REG_RDX; + CurrentDest = &DecodeInst.Src1; + } + + if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_REX_IN_BYTE)) { + LogMan::Throw::A(!HasMODRM, "This instruction shouldn't have ModRM!"); + + // If the REX is in the byte that means the lower nibble of the OP contains the destination GPR + // This also means that the destination is always a GPR on these ones + // ADDITIONALLY: + // If there is a REX prefix then that allows extended GPR usage + CurrentDest->TypeGPR.Type = DecodedOperand::TYPE_GPR; + DecodeInst.Dest.TypeGPR.HighBits = (Is8BitDest && !HasREX && (Op & 0b111) >= 0b100) || HasHighXMM; + CurrentDest->TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, Op & 0b111, Is8BitDest, HasREX, false); + } + + if (HasMODRM) { + if (!DecodeInst.DecodedModRM) { + DecodeInst.ModRM = ReadByte(); + DecodeInst.DecodedModRM = true; + } + + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + + DecodeModRM(ModRM, &Displacement); + HasSIB = DecodeSIB(ModRM, &Displacement); + } + + uint8_t Bytes = Info->MoreBytes + Displacement; + + if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_MUL_2) && HasWideningDisplacement) { + Bytes <<= 1; + } + if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_DIV_2) && HasNarrowingDisplacement) { + Bytes >>= 1; + } + + auto ModRMOperand = [&](FEXCore::X86Tables::DecodedOperand &GPR, FEXCore::X86Tables::DecodedOperand &NonGPR, bool HasXMMGPR, bool HasXMMNonGPR, bool GPR8Bit, bool NonGPR8Bit) { + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + + // Decode the GPR source first + GPR.TypeGPR.Type = DecodedOperand::TYPE_GPR; + GPR.TypeGPR.HighBits = (GPR8Bit && ModRM.reg >= 0b100 && !HasREX) || HasHighXMM; + GPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_R ? 1 : 0, ModRM.reg, GPR8Bit, HasREX, HasXMMGPR); + + // ModRM.mod == 0b11 == Register + // ModRM.Mod != 0b11 == Register-direct addressing + if (ModRM.mod == 0b11) { + NonGPR.TypeGPR.Type = DecodedOperand::TYPE_GPR; + NonGPR.TypeGPR.HighBits = (NonGPR8Bit && ModRM.rm >= 0b100 && !HasREX) || HasHighXMM; + NonGPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, NonGPR8Bit, HasREX, HasXMMNonGPR); + } + else { + if (HasSIB) { + // SIB + FEXCore::X86Tables::SIBDecoded SIB; + SIB.Hex = DecodeInst.SIB; + NonGPR.TypeSIB.Type = DecodedOperand::TYPE_SIB; + NonGPR.TypeSIB.Scale = 1 << SIB.scale; + + // The invalid encoding types are described at Table 1-12. "promoted nsigned is always non-zero" + NonGPR.TypeSIB.Index = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_X ? 1 : 0, SIB.index, false, false, false, 0b100); + NonGPR.TypeSIB.Base = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, SIB.base, false, false, false, ModRM.mod == 0 ? 0b101 : 16); + + uint64_t Literal {0}; + LogMan::Throw::A(Displacement <= 4, "Number of bytes should be <= 4 for literal src"); + + Literal = ReadData(Displacement); + if (Displacement == 1) { + Literal = static_cast(Literal); + } + Bytes -= Displacement; + NonGPR.TypeSIB.Offset = Literal; + } + else if (ModRM.mod == 0) { + // Explained in Table 1-14. "Operand Addressing Using ModRM and SIB Bytes" + LogMan::Throw::A(ModRM.rm != 0b100, "Shouldn't have hit this here"); + if (ModRM.rm == 0b101) { + // 32bit Displacement + uint32_t Literal; + Literal = ReadData(4); + Bytes -= 4; + + NonGPR.TypeRIPLiteral.Type = DecodedOperand::TYPE_RIP_RELATIVE; + NonGPR.TypeRIPLiteral.Literal = Literal; + } + else { + // Register-direct addressing + NonGPR.TypeGPR.Type = DecodedOperand::TYPE_GPR_DIRECT; + NonGPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false); + } + } + else { + uint8_t DisplacementSize = ModRM.mod == 1 ? 1 : 4; + uint32_t Literal; + Literal = ReadData(DisplacementSize); + if (DisplacementSize == 1) { + Literal = static_cast(Literal); + } + Bytes -= DisplacementSize; + + NonGPR.TypeGPRIndirect.Type = DecodedOperand::TYPE_GPR_INDIRECT; + NonGPR.TypeGPRIndirect.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false); + NonGPR.TypeGPRIndirect.Displacement = Literal; + } + } + }; + + if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM && + Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST) { + ModRMOperand(DecodeInst.Src1, DecodeInst.Dest, HasXMMSrc, HasXMMDst, Is8BitSrc, Is8BitDest); + } + + // This is almost the same as when the ModRM is the destination type + // The main different being that Dst and Src flip which bits that use (reg<->rm) + auto *CurrentSrc = &DecodeInst.Src1; + if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM && + !(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST)) { + ModRMOperand(DecodeInst.Dest, DecodeInst.Src1, HasXMMDst, HasXMMSrc, Is8BitDest, Is8BitSrc); + CurrentSrc = &DecodeInst.Src2; + } + else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RAX)) { + CurrentSrc->TypeGPR.Type = DecodedOperand::TYPE_GPR; + CurrentSrc->TypeGPR.HighBits = false; + CurrentSrc->TypeGPR.GPR = FEXCore::X86State::REG_RAX; + CurrentSrc = &DecodeInst.Src2; + } + else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RCX)) { + CurrentSrc->TypeGPR.Type = DecodedOperand::TYPE_GPR; + CurrentSrc->TypeGPR.HighBits = false; + CurrentSrc->TypeGPR.GPR = FEXCore::X86State::REG_RCX; + CurrentSrc = &DecodeInst.Src2; + } + + if (Bytes != 0) { + LogMan::Throw::A(Bytes <= 8, "Number of bytes should be <= 8 for literal src"); + + CurrentSrc->TypeLiteral.Size = Bytes; + + uint64_t Literal {0}; + Literal = ReadData(Bytes); + + if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT) { + if (Bytes == 1) { + Literal = static_cast(Literal); + } + else if (Bytes == 2) { + Literal = static_cast(Literal); + } + else { + Literal = static_cast(Literal); + } + } + + Bytes = 0; + CurrentSrc->TypeLiteral.Type = DecodedOperand::TYPE_LITERAL; + CurrentSrc->TypeLiteral.Literal = Literal; + } + + if (Bytes != 0) { + LogMan::Msg::A("Inst at 0x%lx: 0x%04x '%s' Had an instruction of size %d with %d remaining", DecodeInst.PC, DecodeInst.OP, DecodeInst.TableInfo->Name, InstructionSize, Bytes); + } + LogMan::Throw::A(Bytes == 0, "Had undecoded bytes left in the instruction encoding"); + DecodeInst.InstSize = InstructionSize; + return true; + }; + + auto NormalOpHeader = [&ReadByte, &DecodeInst, &NormalOp](auto &Table, auto Op) -> bool { + FEXCore::X86Tables::X86InstInfo *Info = &Table[Op]; + + DecodeInst.OP = Op; + DecodeInst.TableInfo = Info; + + // XXX: Once we support 32bit x86 then this will be necessary to support + if (Info->Type == FEXCore::X86Tables::TYPE_LEGACY_PREFIX) { + DecodeInst.Flags |= DecodeFlags::FLAG_LEGACY_PREFIX; + LogMan::Msg::A("Legacy Prefix"); + return false; + } + + if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN || + Info->Type == FEXCore::X86Tables::TYPE_INVALID) { + LogMan::Msg::A("Invalid or Unknown instruction: %s 0x%04x 0x%lx", Info->Name, Op, DecodeInst.PC); + return false; + } + + if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_6 && + Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) { +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) + constexpr uint16_t PF_NONE = 0; + constexpr uint16_t PF_F3 = 1; + constexpr uint16_t PF_66 = 2; + constexpr uint16_t PF_F2 = 3; + + uint16_t PrefixType = PF_NONE; + if (DecodeInst.LastEscapePrefix == 0xF3) + PrefixType = PF_F3; + else if (DecodeInst.LastEscapePrefix == 0xF2) + PrefixType = PF_F2; + else if (DecodeInst.LastEscapePrefix == 0x66) + PrefixType = PF_66; + + // We have ModRM + uint8_t ModRMByte = ReadByte(); + DecodeInst.ModRM = ModRMByte; + DecodeInst.DecodedModRM = true; + DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT; + + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + + uint16_t Op = OPD(Info->Type, PrefixType, ModRM.reg); + FEXCore::X86Tables::X86InstInfo *Info = &SecondInstGroupOps[Op]; +#undef OPD + if (Info->Type == FEXCore::X86Tables::TYPE_SECOND_GROUP_MODRM) { + // Everything in this group is privileged instructions aside from XGETBV + constexpr std::array RegToField = { + 255, + 0, + 1, + 2, + 255, + 255, + 255, + 3, + }; + uint8_t Field = RegToField[ModRM.reg]; + LogMan::Throw::A(Field != 255, "Invalid field selected!"); + + uint8_t Op = (Field << 3) | ModRM.rm; + return NormalOp(SecondModRMTableOps, Op); + } + else { + return NormalOp(SecondInstGroupOps, Op); + } + } + else if (Info->Type == FEXCore::X86Tables::TYPE_X87_TABLE_PREFIX) { + // We have ModRM + uint8_t ModRMByte = ReadByte(); + DecodeInst.ModRM = ModRMByte; + DecodeInst.DecodedModRM = true; + DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT; + + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + + uint16_t X87Op = ((Op - 0xD8) << 8) | ModRMByte; + return NormalOp(X87Ops, X87Op); + } + else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && + Info->Type <= FEXCore::X86Tables::TYPE_GROUP_11) { + uint8_t ModRMByte = ReadByte(); + DecodeInst.ModRM = ModRMByte; + DecodeInst.DecodedModRM = true; + DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT; + + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + return NormalOp(PrimaryInstGroupOps, OPD(Info->Type, Info->MoreBytes, ModRM.reg)); +#undef OPD + } + else if (Info->Type == FEXCore::X86Tables::TYPE_MODRM_TABLE_PREFIX) { + } + else if (Info->Type == FEXCore::X86Tables::TYPE_VEX_TABLE_PREFIX) { + uint16_t map_select = 1; + uint16_t pp = 0; + + uint8_t Byte1 = ReadByte(); + + if (Op == 0xC5) { // Two byte VEX + pp = Byte1 & 0b11; + } + else { // 0xC4 = Three byte VEX + uint8_t Byte2 = ReadByte(); + pp = Byte2 & 0b11; + map_select = Byte1 & 0b11111; + LogMan::Throw::A(map_select >= 1 && map_select <= 3, "We don't understand a map_select of: %d", map_select); + } + + uint16_t VEXOp = ReadByte(); +#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode)) + return NormalOp(VEXTableOps, OPD(map_select, pp, VEXOp)); +#undef OPD + } + else if (Info->Type == FEXCore::X86Tables::TYPE_XOP_TABLE_PREFIX) { + LogMan::Msg::A("XOP and POP aren't handled!"); + uint16_t Byte1 = ReadByte(); + uint16_t Byte2 = ReadByte(); + uint16_t XOPOp = ReadByte(); + uint16_t map_select = Byte1 & 0b11111; + LogMan::Throw::A(map_select >= 8 && map_select <= 0xA, "We don't understand a map_select of: %d", map_select); + uint16_t pp = Byte2 & 0b11; + map_select -= 8; + +#define OPD(group, pp, opcode) ( (group << 10) | (pp << 8) | (opcode)) + return NormalOp(XOPTableOps, OPD(map_select, pp, XOPOp)); +#undef OPD + } + + return NormalOp(Table, Op); + }; + + DecodeInst.PC = PC; + while (!InstructionDecoded && !ErrorDuringDecoding) { + uint8_t Op = ReadByte(); + switch (Op) { + case 0x0F: {// Escape Op + uint8_t EscapeOp = ReadByte(); + switch (EscapeOp) { + case 0x0F: { // 3DNow! + // 3DNow! Instruction Encoding: 0F 0F [ModRM] [SIB] [Displacement] [Opcode] + // Decode ModRM + uint8_t ModRMByte = ReadByte(); + DecodeInst.ModRM = ModRMByte; + DecodeInst.DecodedModRM = true; + DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT; + + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst.ModRM; + + uint8_t Displacement = 0; + DecodeModRM(ModRM, &Displacement); + DecodeSIB(ModRM, &Displacement); + + // Take a peek at the op just past the displacement + uint8_t Op = PeekByte(Displacement); + if (NormalOpHeader(FEXCore::X86Tables::DDDNowOps, Op)) { + InstructionDecoded = true; + } + + // Make sure to read the opcode in to our internal structure + ReadByte(); + break; + } + case 0x38: { // F38 Table! + constexpr uint16_t PF_38_NONE = 0; + constexpr uint16_t PF_38_66 = 1; + constexpr uint16_t PF_38_F2 = 2; + + uint16_t Prefix = PF_38_NONE; + if (DecodeInst.LastEscapePrefix == 0xF2) // REPNE + Prefix = PF_38_F2; + else if (DecodeInst.LastEscapePrefix == 0x66) // Operand Size + Prefix = PF_38_66; + + uint16_t Op = (Prefix << 8) | ReadByte(); + if (NormalOpHeader(FEXCore::X86Tables::H0F38TableOps, Op)) { + InstructionDecoded = true; + } + break; + } + case 0x3A: { // F3A Table! + constexpr uint16_t PF_3A_NONE = 0; + constexpr uint16_t PF_3A_66 = 1; + constexpr uint16_t PF_3A_REX = (1 << 1); + + uint16_t Prefix = PF_3A_NONE; + if (DecodeInst.LastEscapePrefix == 0x66) // Operand Size + Prefix = PF_3A_66; + + if (DecodeInst.Flags & DecodeFlags::FLAG_REX_PREFIX) + Prefix |= PF_3A_REX; + + uint16_t Op = (Prefix << 8) | ReadByte(); + if (NormalOpHeader(FEXCore::X86Tables::H0F3ATableOps, Op)) { + InstructionDecoded = true; + } + break; + } + default: // Two byte table! + // x86-64 abuses three legacy prefixes to extend the table encodings + // 0x66 - Operand Size prefix + // 0xF2 - REPNE prefix + // 0xF3 - REP prefix + // If any of these three prefixes are used then it falls down the subtable + // Additionally: If you hit repeat of differnt prefixes then only the LAST one before this one works for subtable selection + if (DecodeInst.LastEscapePrefix == 0xF3) { // REP + // Remove prefix so it doesn't effect calculations. + // This is only an escape prefix rather tan modifier now + DecodeInst.Flags &= ~DecodeFlags::FLAG_REP_PREFIX; + if (NormalOpHeader(FEXCore::X86Tables::RepModOps, EscapeOp)) { + InstructionDecoded = true; + } + } + else if (DecodeInst.LastEscapePrefix == 0xF2) { // REPNE + // Remove prefix so it doesn't effect calculations. + // This is only an escape prefix rather tan modifier now + DecodeInst.Flags &= ~DecodeFlags::FLAG_REPNE_PREFIX; + if (NormalOpHeader(FEXCore::X86Tables::RepNEModOps, EscapeOp)) { + InstructionDecoded = true; + } + } + else if (DecodeInst.LastEscapePrefix == 0x66) { // Operand Size + // Remove prefix so it doesn't effect calculations. + // This is only an escape prefix rather tan modifier now + DecodeInst.Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE; + if (NormalOpHeader(FEXCore::X86Tables::OpSizeModOps, EscapeOp)) { + InstructionDecoded = true; + } + } + else if (NormalOpHeader(FEXCore::X86Tables::SecondBaseOps, EscapeOp)) { + InstructionDecoded = true; + } + break; + } + break; + } + case 0x66: // Operand Size prefix + DecodeInst.Flags |= DecodeFlags::FLAG_OPERAND_SIZE; + DecodeInst.LastEscapePrefix = Op; + break; + case 0x67: // Address Size override prefix + DecodeInst.Flags |= DecodeFlags::FLAG_ADDRESS_SIZE; + break; + case 0x26: // ES legacy prefix + case 0x2E: // CS legacy prefix + case 0x3E: // DS legacy prefix + // Annoyingly GCC generates NOP ops with these prefixes + // Just ignore them for now + // eg. 66 2e 0f 1f 84 00 00 00 00 00 nop WORD PTR cs:[rax+rax*1+0x0] + break; + case 0x40: // REX - 0x40-0x4F + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: { + DecodeInst.Flags |= DecodeFlags::FLAG_REX_PREFIX; + + // Widening displacement + if (Op & 0b1000) + DecodeInst.Flags |= DecodeFlags::FLAG_REX_WIDENING; + + // XGPR_B bit set + if (Op & 0b0001) + DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_B; + + // XGPR_X bit set + if (Op & 0b0010) + DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_X; + + // XGPR_R bit set + if (Op & 0b0100) + DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_R; + + break; + } + case 0xF0: // LOCK prefix + DecodeInst.Flags |= DecodeFlags::FLAG_LOCK; + break; + case 0xF2: // REPNE prefix + DecodeInst.Flags |= DecodeFlags::FLAG_REPNE_PREFIX; + DecodeInst.LastEscapePrefix = Op; + break; + case 0xF3: // REP prefix + DecodeInst.Flags |= DecodeFlags::FLAG_REP_PREFIX; + DecodeInst.LastEscapePrefix = Op; + break; + case 0x64: // FS prefix + DecodeInst.Flags |= DecodeFlags::FLAG_FS_PREFIX; + break; + case 0x65: // GS prefix + DecodeInst.Flags |= DecodeFlags::FLAG_FS_PREFIX; + break; + default: { // Default base table + if (NormalOpHeader(FEXCore::X86Tables::BaseOps, Op)) { + InstructionDecoded = true; + } + else { + LogMan::Msg::E("Error during instruction decoding"); + ErrorDuringDecoding = true; + } + break; + } + } + + } + return !ErrorDuringDecoding; +} + +bool Decoder::BlockEndCanContinuePast(FEXCore::X86Tables::DecodedInst const &Inst) { + if (!CTX->Config.Multiblock) + return false; + + // Have we had a conditional branch past this PC previously? + // We can continue in this case + // + // ex. + // test eax, eax + // jne .Continue + // ud2 <--- We can continue past this instruction, which is a block ender + // .Continue: + // ... + + return Inst.PC <= MaxCondBranchForward; +} + +bool Decoder::BranchTargetInMultiblockRange(FEXCore::X86Tables::DecodedInst const &Inst) { + if (!CTX->Config.Multiblock) + return false; + + // If the RIP setting is conditional AND within our symbol range then it can be considered for multiblock + uint64_t TargetRIP = 0; + bool Conditional = true; + switch (Inst.OP) { + case 0x70 ... 0x7F: { // Conditional JUMP + // Source is a literal + // auto RIPOffset = LoadSource(Op, Op->Src1, Op->Flags); + // auto RIPTargetConst = _Constant(Op->PC + Op->InstSize); + // Target offset is PC + InstSize + Literal + TargetRIP = Inst.PC + Inst.InstSize + Inst.Src1.TypeLiteral.Literal; + break; + } + case 0xE9: + case 0xEB: // Both are unconditional JMP instructions + TargetRIP = Inst.PC + Inst.InstSize + Inst.Src1.TypeLiteral.Literal; + Conditional = false; + break; + case 0xC2: // RET imm + case 0xC3: // RET + Conditional = false; + break; + default: + return false; + break; + } + + // If the target RIP is within the symbol ranges then we are golden + if (TargetRIP > SymbolMinAddress && TargetRIP <= SymbolMaxAddress) { + // Update our conditional branch ranges before we return + if (Conditional) { + MaxCondBranchForward = std::max(MaxCondBranchForward, TargetRIP); + MaxCondBranchBackwards = std::min(MaxCondBranchBackwards, TargetRIP); + } + //JumpTargets.emplace(TargetRIP); + return true; + } + return false; +} + +bool Decoder::DecodeInstructionsInBlock(uint8_t const* InstStream, uint64_t PC) { + // Reset internal state management + DecodedSize = 0; + MaxCondBranchForward = 0; + MaxCondBranchBackwards = ~0ULL; + + // XXX: Load symbol data + SymbolAvailable = false; + EntryPoint = PC; + JumpTargets.clear(); + + bool ErrorDuringDecoding = false; + bool Done = false; + uint64_t PCOffset = 0; + + // If we don't have symbols available then we become a bit optimistic about multiblock ranges + if (!SymbolAvailable) { + // If we don't have a symbol available then assume all branches are valid for multiblock + SymbolMaxAddress = ~0ULL; + SymbolMinAddress = 0; + } + +// LogMan::Msg::I("============================"); +// LogMan::Msg::I(">>> Started decoding at 0x%lx", PC); +// LogMan::Msg::I("============================"); + + while(!Done) { + ErrorDuringDecoding = !DecodeInstruction(InstStream, PC + PCOffset); + if (ErrorDuringDecoding) { + LogMan::Msg::D("Couldn't Decode something at 0x%lx, Started at 0x%lx", PC + PCOffset, PC); + break; + } + auto &DecodeInst = DecodedBuffer[DecodedSize]; + ++DecodedSize; + + bool CanContinue = false; + if (!(DecodeInst.TableInfo->Flags & + (FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END | FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP))) { + // If this isn't a block ender then we can keep going regardless + CanContinue = true; + } + + // If this is an instruction that just completely kills a block then just end currently + // XXX: If we've had a conditional branch past this then keep going + if (DecodeInst.TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END) { + CanContinue = BlockEndCanContinuePast(DecodeInst); + } + + if (DecodeInst.TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP) { + // If we have multiblock enabled + // If the branch target is within our multiblock range then we can keep going on + // We don't want to short circuit this since we want to calculate our ranges still + CanContinue = CanContinue | BranchTargetInMultiblockRange(DecodeInst); + } + + if (!CanContinue) { + break; + } + + if (DecodedSize >= CTX->Config.MaxInstPerBlock) { + break; + } + + if (DecodedSize >= DecodedBuffer.size()) { + break; + } + PCOffset += DecodeInst.InstSize; + InstStream += DecodeInst.InstSize; + } + + return !ErrorDuringDecoding; +} + +} + diff --git a/Source/Interface/Core/Frontend.h b/Source/Interface/Core/Frontend.h new file mode 100644 index 000000000..81017f1d4 --- /dev/null +++ b/Source/Interface/Core/Frontend.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore::Frontend { +class Decoder { +public: + Decoder(FEXCore::Context::Context *ctx); + bool DecodeInstructionsInBlock(uint8_t const* InstStream, uint64_t PC); + + std::pair*, size_t> const GetDecodedInsts() { + return std::make_pair(&DecodedBuffer, DecodedSize); + } + std::set JumpTargets; + +private: + FEXCore::Context::Context *CTX; + + bool DecodeInstruction(uint8_t const *InstStream, uint64_t PC); + + bool BlockEndCanContinuePast(FEXCore::X86Tables::DecodedInst const &Inst); + bool BranchTargetInMultiblockRange(FEXCore::X86Tables::DecodedInst const &Inst); + static constexpr size_t DefaultDecodedBufferSize = 0x10000; + std::vector DecodedBuffer; + size_t DecodedSize {}; + + // This is for multiblock data tracking + bool SymbolAvailable {false}; + uint64_t EntryPoint {}; + uint64_t MaxCondBranchForward {}; + uint64_t MaxCondBranchBackwards {~0ULL}; + uint64_t SymbolMaxAddress {}; + uint64_t SymbolMinAddress {~0ULL}; + +}; +} diff --git a/Source/Interface/Core/InternalThreadState.h b/Source/Interface/Core/InternalThreadState.h new file mode 100644 index 000000000..d77d97e6c --- /dev/null +++ b/Source/Interface/Core/InternalThreadState.h @@ -0,0 +1 @@ +#include diff --git a/Source/Interface/Core/Interpreter/InterpreterCore.cpp b/Source/Interface/Core/Interpreter/InterpreterCore.cpp new file mode 100644 index 000000000..a4eab635e --- /dev/null +++ b/Source/Interface/Core/Interpreter/InterpreterCore.cpp @@ -0,0 +1,1468 @@ +#include "LogManager.h" +#include "Common/MathUtils.h" +#include "Interface/Context/Context.h" +#include "Interface/Core/DebugData.h" +#include "Interface/Core/InternalThreadState.h" +#include "Interface/HLE/Syscalls.h" +#include "LogManager.h" + +#include +#include +#include + +#include +#include + +namespace FEXCore::CPU { + +#define DESTMAP_AS_MAP 0 +#if DESTMAP_AS_MAP +using DestMapType = std::unordered_map; +#else +using DestMapType = std::vector; +#endif + +class InterpreterCore final : public CPUBackend { +public: + explicit InterpreterCore(FEXCore::Context::Context *ctx); + ~InterpreterCore() override = default; + std::string GetName() override { return "Interpreter"; } + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) override; + + void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } + + bool NeedsOpDispatch() override { return true; } + + void ExecuteCode(FEXCore::Core::InternalThreadState *Thread); +private: + FEXCore::Context::Context *CTX; + void *AllocateTmpSpace(size_t Size); + + template + Res GetDest(IR::NodeWrapper Op); + + template + Res GetSrc(IR::NodeWrapper Src); + + std::vector TmpSpace; + DestMapType DestMap; + size_t TmpOffset{}; + + FEXCore::IR::IRListView *CurrentIR; +}; + +static void InterpreterExecution(FEXCore::Core::InternalThreadState *Thread) { + InterpreterCore *Core = reinterpret_cast(Thread->CPUBackend.get()); + Core->ExecuteCode(Thread); +} + +InterpreterCore::InterpreterCore(FEXCore::Context::Context *ctx) + : CTX {ctx} { + // Grab our space for temporary data + TmpSpace.resize(4096 * 32); +#if !DESTMAP_AS_MAP + DestMap.resize(4096); +#endif +} + +void *InterpreterCore::AllocateTmpSpace(size_t Size) { + // XXX: IR generation has a bug where the size can periodically end up being zero + // LogMan::Throw::A(Size !=0, "Dest Op had zero destination size"); + Size = Size < 16 ? 16 : Size; + + // Force alignment by size + size_t NewBase = AlignUp(TmpOffset, Size); + size_t NewEnd = NewBase + Size; + + if (NewEnd >= TmpSpace.size()) { + // If we are going to overrun the end of our temporary space then double the size of it + TmpSpace.resize(TmpSpace.size() * 2); + } + + // Make sure to set the new offset + TmpOffset = NewEnd; + + return &TmpSpace.at(NewBase); +} + +template +Res InterpreterCore::GetDest(IR::NodeWrapper Op) { + auto DstPtr = DestMap[Op.NodeOffset]; + return reinterpret_cast(DstPtr); +} + +template +Res InterpreterCore::GetSrc(IR::NodeWrapper Src) { +#if DESTMAP_AS_MAP + LogMan::Throw::A(DestMap.find(Src.NodeOffset) != DestMap.end(), "Op had source but it wasn't in the destination map"); +#endif + + auto DstPtr = DestMap[Src.NodeOffset]; + LogMan::Throw::A(DstPtr != nullptr, "Destmap had slot but didn't get allocated memory"); + return reinterpret_cast(DstPtr); +} + +void *InterpreterCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData) { + return reinterpret_cast(InterpreterExecution); +} + +void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) { + auto IR = Thread->IRLists.find(Thread->State.State.rip); + auto DebugData = Thread->DebugData.find(Thread->State.State.rip); + CurrentIR = IR->second.get(); + + bool Quit = false; + TmpOffset = 0; // Reset where we are in the temp data range + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + #if DESTMAP_AS_MAP + DestMap.clear(); + #else + uintptr_t ListSize = CurrentIR->GetListSize(); + if (ListSize > DestMap.size()) { + DestMap.resize(std::max(DestMap.size() * 2, ListSize)); + } + #endif + + static_assert(sizeof(FEXCore::IR::IROp_Header) == 4); + static_assert(sizeof(FEXCore::IR::OrderedNode) == 16); + + #define GD *GetDest(*WrapperOp) + #define GDP GetDest(*WrapperOp) + while (Begin != End && !Quit) { + using namespace FEXCore::IR; + using namespace FEXCore::IR; + + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + uint8_t OpSize = IROp->Size; + + if (IROp->HasDest) { + uint64_t AllocSize = OpSize * std::min(static_cast(1), IROp->Elements); + DestMap[WrapperOp->NodeOffset] = AllocateTmpSpace(AllocSize); + } + + switch (IROp->Op) { + case IR::OP_BEGINBLOCK: + break; + case IR::OP_ENDBLOCK: { + auto Op = IROp->C(); + Thread->State.State.rip += Op->RIPIncrement; + break; + } + case IR::OP_EXITFUNCTION: + case IR::OP_ENDFUNCTION: { + Quit = true; + break; + } + + case IR::OP_MOV: { + auto Op = IROp->C(); + memcpy(GDP, GetSrc(Op->Header.Args[0]), OpSize); + break; + } + case IR::OP_BREAK: { + auto Op = IROp->C(); + switch (Op->Reason) { + case 4: // HLT + Thread->State.RunningEvents.ShouldStop = true; + Quit = true; + break; + default: LogMan::Msg::A("Unknown Break reason: %d", Op->Reason); + } + } + break; + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + uint64_t Arg = *GetSrc(Op->Header.Args[0]); + if (!!Arg) { + // Convert argument from NodeWrapper to NodeWrapperIterator + auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[1]); + Begin = IterLocation; + continue; + } + break; + } + case IR::OP_JUMP: { + auto Op = IROp->C(); + // Convert argument from NodeWrapper to NodeWrapperIterator + auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[0]); + Begin = IterLocation; + continue; + break; + } + case IR::OP_CONSTANT: { + auto Op = IROp->C(); + GD = Op->Constant; + break; + } + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); + + uintptr_t ContextPtr = reinterpret_cast(&Thread->State.State); + ContextPtr += Op->Offset; +#define LOAD_CTX(x, y) \ + case x: { \ + y const *Data = reinterpret_cast(ContextPtr); \ + GD = *Data; \ + } \ + break + switch (Op->Size) { + LOAD_CTX(1, uint8_t); + LOAD_CTX(2, uint16_t); + LOAD_CTX(4, uint32_t); + LOAD_CTX(8, uint64_t); + case 16: { + void const *Data = reinterpret_cast(ContextPtr); + memcpy(GDP, Data, Op->Size); + } + break; + default: LogMan::Msg::A("Unhandled LoadContext size: %d", Op->Size); + } +#undef LOAD_CTX + break; + } + case IR::OP_LOADFLAG: { + auto Op = IROp->C(); + + uintptr_t ContextPtr = reinterpret_cast(&Thread->State.State); + ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]); + ContextPtr += Op->Flag; + uint8_t const *Data = reinterpret_cast(ContextPtr); + GD = *Data; + break; + } + case IR::OP_STOREFLAG: { + auto Op = IROp->C(); + uint8_t Arg = *GetSrc(Op->Header.Args[0]) & 1; + + uintptr_t ContextPtr = reinterpret_cast(&Thread->State.State); + ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]); + ContextPtr += Op->Flag; + uint8_t *Data = reinterpret_cast(ContextPtr); + *Data = Arg; + break; + } + + case IR::OP_STORECONTEXT: { + auto Op = IROp->C(); + + uintptr_t ContextPtr = reinterpret_cast(&Thread->State.State); + ContextPtr += Op->Offset; + + void *Data = reinterpret_cast(ContextPtr); + void *Src = GetSrc(Op->Header.Args[0]); + memcpy(Data, Src, Op->Size); + break; + } + case IR::OP_SYSCALL: { + auto Op = IROp->C(); + + FEXCore::HLE::SyscallArguments Args; + for (size_t j = 0; j < 7; ++j) + Args.Argument[j] = *GetSrc(Op->Header.Args[j]); + + uint64_t Res = CTX->SyscallHandler.HandleSyscall(Thread, &Args); + GD = Res; + break; + } + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + void const *Data = Thread->CTX->MemoryMapper.GetPointer(*GetSrc(Op->Header.Args[0])); + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx\n", *GetSrc(Op->Header.Args[0])); + memcpy(GDP, Data, OpSize); + + uint64_t Ret{}; + memcpy(&Ret, Data, Op->Size > 8 ? 8 : Op->Size); + //LogMan::Msg::D("Loading from guestmem: 0x%lx (%d)", *GetSrc(Op->Header.Args[0]), Op->Size); + //LogMan::Msg::D("\tLoading: 0x%016lx", Ret); + break; + } + case IR::OP_STOREMEM: { + #define STORE_DATA(x, y) \ + case x: { \ + y *Data = Thread->CTX->MemoryMapper.GetBaseOffset(*GetSrc(Op->Header.Args[0])); \ + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx for size %d store\n", *GetSrc(Op->Header.Args[0]), x);\ + *Data = *GetSrc(Op->Header.Args[1]); \ + } \ + break + + auto Op = IROp->C(); + //LogMan::Msg::D("Storing guestmem: 0x%lx (%d)", *GetSrc(Op->Header.Args[0]), Op->Size); + //LogMan::Msg::D("\tStoring: 0x%016lx", (uint64_t)*GetSrc(Op->Header.Args[1])); + + switch (Op->Size) { + STORE_DATA(1, uint8_t); + STORE_DATA(2, uint16_t); + STORE_DATA(4, uint32_t); + STORE_DATA(8, uint64_t); + case 16: { + void *Mem = Thread->CTX->MemoryMapper.GetPointer(*GetSrc(Op->Header.Args[0])); + void *Src = GetSrc(Op->Header.Args[1]); + memcpy(Mem, Src, 16); + } + break; + default: + LogMan::Msg::A("Unhandled StoreMem size"); + break; + } + #undef STORE_DATA + break; + } + case IR::OP_ADD: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + GD = Src1 + Src2; + break; + } + case IR::OP_SUB: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + GD = Src1 - Src2; + break; + } + case IR::OP_MUL: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + break; + case 2: + GD = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + break; + case 4: + GD = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + break; + case 8: + GD = static_cast(Src1) * static_cast(Src2); + break; + case 16: { + __int128_t Tmp = static_cast<__int128_t>(static_cast(Src1)) * static_cast<__int128_t>(static_cast(Src2)); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown Mul Size: %d\n", OpSize); break; + } + break; + } + case IR::OP_MULH: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: { + int64_t Tmp = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + GD = Tmp >> 8; + break; + } + case 2: { + int64_t Tmp = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + GD = Tmp >> 16; + break; + } + case 4: { + int64_t Tmp = static_cast(static_cast(Src1)) * static_cast(static_cast(Src2)); + GD = Tmp >> 32; + break; + } + case 8: { + __int128_t Tmp = static_cast<__int128_t>(static_cast(Src1)) * static_cast<__int128_t>(static_cast(Src2)); + GD = Tmp >> 64; + } + break; + default: LogMan::Msg::A("Unknown MulH Size: %d\n", OpSize); break; + } + break; + } + case IR::OP_UMUL: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(Src1) * static_cast(Src2); + break; + case 2: + GD = static_cast(Src1) * static_cast(Src2); + break; + case 4: + GD = static_cast(Src1) * static_cast(Src2); + break; + case 8: + GD = static_cast(Src1) * static_cast(Src2); + break; + case 16: { + __uint128_t Tmp = static_cast<__uint128_t>(static_cast(Src1)) * static_cast<__uint128_t>(static_cast(Src2)); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown UMul Size: %d\n", OpSize); break; + } + break; + } + case IR::OP_UMULH: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + switch (OpSize) { + case 1: + GD = static_cast(Src1) * static_cast(Src2); + GD >>= 8; + break; + case 2: + GD = static_cast(Src1) * static_cast(Src2); + GD >>= 16; + break; + case 4: + GD = static_cast(Src1) * static_cast(Src2); + GD >>= 32; + break; + case 8: { + __uint128_t Tmp = static_cast<__uint128_t>(Src1) * static_cast<__uint128_t>(Src2); + GD = Tmp >> 64; + } + break; + case 16: { + // XXX: This is incorrect + __uint128_t Tmp = static_cast<__uint128_t>(Src1) * static_cast<__uint128_t>(Src2); + GD = Tmp >> 64; + } + break; + + default: LogMan::Msg::A("Unknown UMulH Size: %d\n", OpSize); break; + } + break; + } + case IR::OP_DIV: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 2: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 4: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 8: + GD = static_cast(Src1) / static_cast(Src2); + break; + case 16: { + __int128_t Tmp = *GetSrc<__int128_t*>(Op->Header.Args[0]) / *GetSrc<__int128_t*>(Op->Header.Args[1]); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown Mul Size: %d\n", OpSize); break; + } + break; + } + + case IR::OP_UDIV: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 2: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 4: + GD = static_cast(static_cast(Src1)) / static_cast(static_cast(Src2)); + break; + case 8: + GD = static_cast(Src1) / static_cast(Src2); + break; + case 16: { + __uint128_t Tmp = *GetSrc<__uint128_t*>(Op->Header.Args[0]) / *GetSrc<__uint128_t*>(Op->Header.Args[1]); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown Mul Size: %d\n", OpSize); break; + } + break; + } + + case IR::OP_REM: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 2: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 4: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 8: + GD = static_cast(Src1) % static_cast(Src2); + break; + case 16: { + __int128_t Tmp = *GetSrc<__int128_t*>(Op->Header.Args[0]) % *GetSrc<__int128_t*>(Op->Header.Args[1]); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown Mul Size: %d\n", OpSize); break; + } + break; + } + + case IR::OP_UREM: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + switch (OpSize) { + case 1: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 2: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 4: + GD = static_cast(static_cast(Src1)) % static_cast(static_cast(Src2)); + break; + case 8: + GD = static_cast(Src1) % static_cast(Src2); + break; + case 16: { + __uint128_t Tmp = *GetSrc<__uint128_t*>(Op->Header.Args[0]) % *GetSrc<__uint128_t*>(Op->Header.Args[1]); + memcpy(GDP, &Tmp, 16); + } + break; + + default: LogMan::Msg::A("Unknown Mul Size: %d\n", OpSize); break; + } + break; + } + + + case IR::OP_OR: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + GD = Src1 | Src2; + break; + } + case IR::OP_AND: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + GD = Src1 & Src2; + break; + } + case IR::OP_XOR: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + GD = Src1 ^ Src2; + break; + } + case IR::OP_LSHL: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + uint8_t Mask = OpSize * 8 - 1; + GD = Src1 << (Src2 & Mask); + break; + } + case IR::OP_LSHR: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + uint8_t Mask = OpSize * 8 - 1; + GD = Src1 >> (Src2 & Mask); + break; + } + case IR::OP_ASHR: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + uint8_t Mask = OpSize * 8 - 1; + switch (OpSize) { + case 1: + GD = static_cast(Src1) >> (Src2 & Mask); + break; + case 2: + GD = static_cast(Src1) >> (Src2 & Mask); + break; + case 4: + GD = static_cast(Src1) >> (Src2 & Mask); + break; + case 8: + GD = static_cast(Src1) >> (Src2 & Mask); + break; + default: LogMan::Msg::A("Unknown ASHR Size: %d\n", OpSize); break; + }; + break; + } + case IR::OP_ROR: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + auto Ror = [] (auto In, auto R) { + auto RotateMask = sizeof(In) * 8 - 1; + R &= RotateMask; + return (In >> R) | (In << (sizeof(In) * 8 - R)); + }; + + switch (OpSize) { + case 1: + GD = Ror(static_cast(Src1), static_cast(Src2)); + break; + case 2: + GD = Ror(static_cast(Src1), static_cast(Src2)); + break; + case 4: + GD = Ror(static_cast(Src1), static_cast(Src2)); + break; + case 8: { + GD = Ror(static_cast(Src1), static_cast(Src2)); + } + break; + default: LogMan::Msg::A("Unknown ROR Size: %d\n", OpSize); break; + } + break; + } + case IR::OP_ROL: { + auto Op = IROp->C(); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + auto Rol = [] (auto In, auto R) { + auto RotateMask = sizeof(In) * 8 - 1; + R &= RotateMask; + return (In << R) | (In >> (sizeof(In) * 8 - R)); + }; + + switch (OpSize) { + case 1: + GD = Rol(static_cast(Src1), static_cast(Src2)); + break; + case 2: + GD = Rol(static_cast(Src1), static_cast(Src2)); + break; + case 4: + GD = Rol(static_cast(Src1), static_cast(Src2)); + break; + case 8: { + GD = Rol(static_cast(Src1), static_cast(Src2)); + } + break; + default: LogMan::Msg::A("Unknown ROL Size: %d\n", OpSize); break; + } + break; + } + + case IR::OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + uint64_t Src = *GetSrc(Op->Header.Args[0]); + if (Op->SrcSize == 64) { + // Zext 64bit to 128bit + __uint128_t SrcLarge = Src; + memcpy(GDP, &SrcLarge, 16); + } + else { + GD = Src & ((1ULL << Op->SrcSize) - 1); + } + break; + } + case IR::OP_SEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + switch (Op->SrcSize / 8) { + case 1: + GD = *GetSrc(Op->Header.Args[0]); + break; + case 2: + GD = *GetSrc(Op->Header.Args[0]); + break; + case 4: + GD = *GetSrc(Op->Header.Args[0]); + break; + case 8: + GD = *GetSrc(Op->Header.Args[0]); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", Op->SrcSize / 8); + } + break; + } + case IR::OP_NEG: { + auto Op = IROp->C(); + uint64_t Src = *GetSrc(Op->Header.Args[0]); + GD = ~Src; + break; + } + case IR::OP_POPCOUNT: { + auto Op = IROp->C(); + uint64_t Src = *GetSrc(Op->Header.Args[0]); + GD = __builtin_popcountl(Src); + break; + } + case IR::OP_FINDLSB: { + auto Op = IROp->C(); + uint64_t Src = *GetSrc(Op->Header.Args[0]); + uint64_t Result = __builtin_ffsll(Src); + GD = Result - 1; + break; + } + case IR::OP_FINDMSB: { + auto Op = IROp->C(); + uint64_t Src = *GetSrc(Op->Header.Args[0]); + uint64_t Result = Op->Header.Size * 8 - __builtin_clzll(Src); + GD = Result; + break; + } + + case IR::OP_SELECT: { + auto Op = IROp->C(); + bool CompResult = false; + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + uint64_t ArgTrue = *GetSrc(Op->Header.Args[2]); + uint64_t ArgFalse = *GetSrc(Op->Header.Args[3]); + + switch (Op->Cond) { + case FEXCore::IR::COND_EQ: + CompResult = Src1 == Src2; + break; + case FEXCore::IR::COND_NEQ: + CompResult = Src1 != Src2; + break; + case FEXCore::IR::COND_GE: + CompResult = Src1 >= Src2; + break; + case FEXCore::IR::COND_LT: + CompResult = Src1 < Src2; + break; + case FEXCore::IR::COND_GT: + CompResult = Src1 > Src2; + break; + case FEXCore::IR::COND_LE: + CompResult = Src1 <= Src2; + break; + case FEXCore::IR::COND_CS: + case FEXCore::IR::COND_CC: + case FEXCore::IR::COND_MI: + case FEXCore::IR::COND_PL: + case FEXCore::IR::COND_VS: + case FEXCore::IR::COND_VC: + case FEXCore::IR::COND_HI: + case FEXCore::IR::COND_LS: + default: + LogMan::Msg::A("Unsupported compare type"); + break; + } + GD = CompResult ? ArgTrue : ArgFalse; + break; + } + case IR::OP_BFI: { + auto Op = IROp->C(); + uint64_t SourceMask = (1ULL << Op->Width) - 1; + if (Op->Width == 64) + SourceMask = ~0ULL; + uint64_t DestMask = ~(SourceMask << Op->lsb); + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + uint64_t Res = (Src1 & DestMask) | ((Src2 & SourceMask) << Op->lsb); + GD = Res; + break; + } + case IR::OP_BFE: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "OpSize is too large for BFE: %d", OpSize); + if (OpSize == 16) { + LogMan::Throw::A(Op->Width <= 64, "Can't extract width of %d", Op->Width); + __uint128_t SourceMask = (1ULL << Op->Width) - 1; + if (Op->Width == 64) + SourceMask = ~0ULL; + SourceMask <<= Op->lsb; + __uint128_t Src = (*GetSrc<__uint128_t*>(Op->Header.Args[0]) & SourceMask) >> Op->lsb; + memcpy(GDP, &Src, OpSize); + } + else { + uint64_t SourceMask = (1ULL << Op->Width) - 1; + if (Op->Width == 64) + SourceMask = ~0ULL; + SourceMask <<= Op->lsb; + uint64_t Src = *GetSrc(Op->Header.Args[0]); + GD = (Src & SourceMask) >> Op->lsb; + } + break; + } + case IR::OP_PRINT: { + auto Op = IROp->C(); + + if (OpSize <= 8) { + uint64_t Src = *GetSrc(Op->Header.Args[0]); + LogMan::Msg::I(">>>> Value in Arg: 0x%lx, %ld", Src, Src); + } + else if (OpSize == 16) { + __uint128_t Src = *GetSrc<__uint128_t*>(Op->Header.Args[0]); + uint64_t Src0 = Src; + uint64_t Src1 = Src >> 64; + LogMan::Msg::I(">>>> Value[0] in Arg: 0x%lx, %ld", Src0, Src0); + LogMan::Msg::I(" Value[1] in Arg: 0x%lx, %ld", Src1, Src1); + } + else + LogMan::Msg::A("Unknown value size: %d", OpSize); + break; + } + case IR::OP_CPUID: { + auto Op = IROp->C(); + uint64_t *DstPtr = GetDest(*WrapperOp); + uint64_t Arg = *GetSrc(Op->Header.Args[0]); + + auto Results = CTX->CPUID.RunFunction(Arg); + memcpy(DstPtr, &Results.Res, sizeof(uint32_t) * 4); + break; + } + case IR::OP_EXTRACTELEMENT: { + auto ExtractElementOp = IROp->C(); + + uintptr_t DstPtr = GetDest(*WrapperOp); + uintptr_t SrcPtr = GetSrc(ExtractElementOp->Header.Args[0]); + + // Offset to the element offset + SrcPtr += IROp->Size * ExtractElementOp->Idx; + memcpy(reinterpret_cast(DstPtr), reinterpret_cast(SrcPtr), IROp->Size); + break; + } + case IR::OP_CAS: { + auto Op = IROp->C(); + auto Size = OpSize; + switch (Size) { + case 1: { + std::atomic *Data = Thread->CTX->MemoryMapper.GetPointer *>(*GetSrc(Op->Header.Args[2])); + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx\n", *GetSrc(Op->Header.Args[2])); + + uint8_t Src1 = *GetSrc(Op->Header.Args[0]); + uint8_t Src2 = *GetSrc(Op->Header.Args[1]); + + uint8_t Expected = Src1; + bool Result = Data->compare_exchange_strong(Expected, Src2); + GD = Result ? Src1 : Expected; + break; + } + case 2: { + std::atomic *Data = Thread->CTX->MemoryMapper.GetPointer *>(*GetSrc(Op->Header.Args[2])); + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx\n", *GetSrc(Op->Header.Args[2])); + + uint16_t Src1 = *GetSrc(Op->Header.Args[0]); + uint16_t Src2 = *GetSrc(Op->Header.Args[1]); + + uint16_t Expected = Src1; + bool Result = Data->compare_exchange_strong(Expected, Src2); + GD = Result ? Src1 : Expected; + break; + } + case 4: { + std::atomic *Data = Thread->CTX->MemoryMapper.GetPointer *>(*GetSrc(Op->Header.Args[2])); + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx\n", *GetSrc(Op->Header.Args[2])); + + uint32_t Src1 = *GetSrc(Op->Header.Args[0]); + uint32_t Src2 = *GetSrc(Op->Header.Args[1]); + + uint32_t Expected = Src1; + bool Result = Data->compare_exchange_strong(Expected, Src2); + GD = Result ? Src1 : Expected; + break; + } + case 8: { + std::atomic *Data = Thread->CTX->MemoryMapper.GetPointer *>(*GetSrc(Op->Header.Args[2])); + LogMan::Throw::A(Data != nullptr, "Couldn't Map pointer to 0x%lx\n", *GetSrc(Op->Header.Args[2])); + + uint64_t Src1 = *GetSrc(Op->Header.Args[0]); + uint64_t Src2 = *GetSrc(Op->Header.Args[1]); + + uint64_t Expected = Src1; + bool Result = Data->compare_exchange_strong(Expected, Src2); + GD = Result ? Src1 : Expected; + break; + } + default: LogMan::Msg::A("Unknown CAS size: %d", Size); break; + } + break; + } + case IR::OP_REV: { + auto Op = IROp->C(); + switch (OpSize) { + case 2: GD = __builtin_bswap16(*GetSrc(Op->Header.Args[0])); break; + case 4: GD = __builtin_bswap32(*GetSrc(Op->Header.Args[0])); break; + case 8: GD = __builtin_bswap64(*GetSrc(Op->Header.Args[0])); break; + default: LogMan::Msg::A("Unknown REV size: %d", OpSize); break; + } + break; + } + + case IR::OP_CYCLECOUNTER: { +#ifdef DEBUG_CYCLES + GD = 0; +#else + timespec time; + clock_gettime(CLOCK_REALTIME, &time); + GD = time.tv_nsec + time.tv_sec * 1000000000; +#endif + break; + } + // Vector ops + case IR::OP_CREATEVECTOR2: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + uint8_t ElementSize = OpSize / 2; +#define CREATE_VECTOR(elementsize, type) \ + case elementsize: { \ + auto *Dst_d = reinterpret_cast(Tmp); \ + auto *Src1_d = reinterpret_cast(Src1); \ + auto *Src2_d = reinterpret_cast(Src2); \ + Dst_d[0] = *Src1_d; \ + Dst_d[1] = *Src2_d; \ + break; \ + } + switch (ElementSize) { + CREATE_VECTOR(1, uint8_t); + CREATE_VECTOR(2, uint16_t); + CREATE_VECTOR(4, uint32_t); + CREATE_VECTOR(8, uint64_t); + default: LogMan::Msg::A("Unknown Element Size: %d", ElementSize); break; + } +#undef CREATE_VECTOR + memcpy(GDP, Tmp, OpSize); + + break; + } + case IR::OP_SPLATVECTOR4: + case IR::OP_SPLATVECTOR3: + case IR::OP_SPLATVECTOR2: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + void *Src = GetSrc(Op->Header.Args[0]); + uint8_t Tmp[16]; + uint8_t Elements = 0; + + switch (Op->Header.Op) { + case IR::OP_SPLATVECTOR4: Elements = 4; break; + case IR::OP_SPLATVECTOR3: Elements = 3; break; + case IR::OP_SPLATVECTOR2: Elements = 2; break; + default: LogMan::Msg::A("Uknown Splat size"); break; + } + + uint8_t ElementSize = OpSize / Elements; +#define CREATE_VECTOR(elementsize, type) \ + case elementsize: { \ + auto *Dst_d = reinterpret_cast(Tmp); \ + auto *Src_d = reinterpret_cast(Src); \ + for (uint8_t i = 0; i < Elements; ++i) \ + Dst_d[i] = *Src_d;\ + break; \ + } + switch (ElementSize) { + CREATE_VECTOR(1, uint8_t); + CREATE_VECTOR(2, uint16_t); + CREATE_VECTOR(4, uint32_t); + CREATE_VECTOR(8, uint64_t); + default: LogMan::Msg::A("Unknown Element Size: %d", ElementSize); break; + } +#undef CREATE_VECTOR + memcpy(GDP, Tmp, OpSize); + + break; + } + + case IR::OP_VOR: { + auto Op = IROp->C(); + __uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]); + __uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]); + + __uint128_t Dst = Src1 | Src2; + memcpy(GDP, &Dst, 16); + break; + } + case IR::OP_VXOR: { + auto Op = IROp->C(); + __uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]); + __uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]); + + __uint128_t Dst = Src1 ^ Src2; + memcpy(GDP, &Dst, 16); + break; + } +#define DO_VECTOR_OP(size, type, func) \ + case size: { \ + auto *Dst_d = reinterpret_cast(Tmp); \ + auto *Src1_d = reinterpret_cast(Src1); \ + auto *Src2_d = reinterpret_cast(Src2); \ + for (uint8_t i = 0; i < Elements; ++i) { \ + Dst_d[i] = func(Src1_d[i], Src2_d[i]); \ + } \ + break; \ + } +#define DO_VECTOR_SCALAR_OP(size, type, func)\ + case size: { \ + auto *Dst_d = reinterpret_cast(Tmp); \ + auto *Src1_d = reinterpret_cast(Src1); \ + auto *Src2_d = reinterpret_cast(Src2); \ + for (uint8_t i = 0; i < Elements; ++i) { \ + Dst_d[i] = func(Src1_d[i], *Src2_d); \ + } \ + break; \ + } + + case IR::OP_VADD: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + + auto Func = [](auto a, auto b) { return a + b; }; + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VSUB: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + + auto Func = [](auto a, auto b) { return a - b; }; + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VUMIN: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return std::min(a, b); }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VSMIN: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return std::min(a, b); }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, int8_t, Func) + DO_VECTOR_OP(2, int16_t, Func) + DO_VECTOR_OP(4, int32_t, Func) + DO_VECTOR_OP(8, int64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VUSHL: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return a << b; }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + + case IR::OP_VUSHLS: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return a << b; }; + + switch (Op->ElementSize) { + DO_VECTOR_SCALAR_OP(1, uint8_t, Func) + DO_VECTOR_SCALAR_OP(2, uint16_t, Func) + DO_VECTOR_SCALAR_OP(4, uint32_t, Func) + DO_VECTOR_SCALAR_OP(8, uint64_t, Func) + DO_VECTOR_SCALAR_OP(16, __uint128_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + + case IR::OP_VUSHR: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return a >> b; }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + + case IR::OP_VZIP2: + case IR::OP_VZIP: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + uint8_t BaseOffset = IROp->Op == IR::OP_VZIP2 ? (Elements / 2) : 0; + Elements >>= 1; + + switch (Op->ElementSize) { + case 1: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src1_d = reinterpret_cast(Src1); + auto *Src2_d = reinterpret_cast(Src2); + for (unsigned i = 0; i < Elements; ++i) { + Dst_d[i*2] = Src1_d[BaseOffset + i]; + Dst_d[i*2+1] = Src2_d[BaseOffset + i]; + } + break; + } + case 2: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src1_d = reinterpret_cast(Src1); + auto *Src2_d = reinterpret_cast(Src2); + for (unsigned i = 0; i < Elements; ++i) { + Dst_d[i*2] = Src1_d[BaseOffset + i]; + Dst_d[i*2+1] = Src2_d[BaseOffset + i]; + } + break; + } + case 4: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src1_d = reinterpret_cast(Src1); + auto *Src2_d = reinterpret_cast(Src2); + for (unsigned i = 0; i < Elements; ++i) { + Dst_d[i*2] = Src1_d[BaseOffset + i]; + Dst_d[i*2+1] = Src2_d[BaseOffset + i]; + } + break; + } + case 8: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src1_d = reinterpret_cast(Src1); + auto *Src2_d = reinterpret_cast(Src2); + for (unsigned i = 0; i < Elements; ++i) { + Dst_d[i*2] = Src1_d[BaseOffset + i]; + Dst_d[i*2+1] = Src2_d[BaseOffset + i]; + } + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + + case IR::OP_VINSELEMENT: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + // Copy src1 in to dest + memcpy(Tmp, Src1, Op->RegisterSize); + switch (Op->ElementSize) { + case 1: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src2_d = reinterpret_cast(Src2); + Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + break; + } + case 2: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src2_d = reinterpret_cast(Src2); + Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + break; + } + case 4: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src2_d = reinterpret_cast(Src2); + Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + break; + } + case 8: { + auto *Dst_d = reinterpret_cast(Tmp); + auto *Src2_d = reinterpret_cast(Src2); + Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + }; + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VCMPEQ: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return a == b ? ~0ULL : 0; }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, uint8_t, Func) + DO_VECTOR_OP(2, uint16_t, Func) + DO_VECTOR_OP(4, uint32_t, Func) + DO_VECTOR_OP(8, uint64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + case IR::OP_VCMPGT: { + auto Op = IROp->C(); + void *Src1 = GetSrc(Op->Header.Args[0]); + void *Src2 = GetSrc(Op->Header.Args[1]); + uint8_t Tmp[16]; + + uint8_t Elements = Op->RegisterSize / Op->ElementSize; + auto Func = [](auto a, auto b) { return a > b ? ~0ULL : 0; }; + + switch (Op->ElementSize) { + DO_VECTOR_OP(1, int8_t, Func) + DO_VECTOR_OP(2, int16_t, Func) + DO_VECTOR_OP(4, int32_t, Func) + DO_VECTOR_OP(8, int64_t, Func) + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + + memcpy(GDP, Tmp, Op->RegisterSize); + break; + } + + case IR::OP_LUDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + uint32_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint32_t SrcHigh = *GetSrc(Op->Header.Args[1]); + uint32_t Divisor = *GetSrc(Op->Header.Args[2]); + uint64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; + uint64_t Res = Source / Divisor; + + // We only store the lower bits of the result + GD = static_cast(Res); + break; + } + + case 8: { + uint64_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint64_t SrcHigh = *GetSrc(Op->Header.Args[1]); + uint64_t Divisor = *GetSrc(Op->Header.Args[2]); + __uint128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow; + __uint128_t Res = Source / Divisor; + + // We only store the lower bits of the result + memcpy(GDP, &Res, Size); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + uint32_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint32_t SrcHigh = *GetSrc(Op->Header.Args[1]); + int32_t Divisor = *GetSrc(Op->Header.Args[2]); + int64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; + int64_t Res = Source / Divisor; + + // We only store the lower bits of the result + GD = static_cast(Res); + break; + } + case 8: { + uint64_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint64_t SrcHigh = *GetSrc(Op->Header.Args[1]); + int64_t Divisor = *GetSrc(Op->Header.Args[2]); + __int128_t Source = (static_cast<__int128_t>(SrcHigh) << 64) | SrcLow; + __int128_t Res = Source / Divisor; + + // We only store the lower bits of the result + memcpy(GDP, &Res, Size); + break; + } + default: LogMan::Msg::A("Unknown LDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LUREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit Remainder from x86-64 + auto Size = OpSize; + switch (Size) { + + case 4: { + uint32_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint32_t SrcHigh = *GetSrc(Op->Header.Args[1]); + uint32_t Divisor = *GetSrc(Op->Header.Args[2]); + uint64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; + uint64_t Res = Source % Divisor; + + // We only store the lower bits of the result + GD = static_cast(Res); + break; + } + + case 8: { + uint64_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint64_t SrcHigh = *GetSrc(Op->Header.Args[1]); + uint64_t Divisor = *GetSrc(Op->Header.Args[2]); + __uint128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow; + __uint128_t Res = Source % Divisor; + // We only store the lower bits of the result + memcpy(GDP, &Res, Size); + break; + } + default: LogMan::Msg::A("Unknown LUREM Size: %d", Size); break; + } + break; + } + + case IR::OP_LREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit Remainder from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + uint32_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint32_t SrcHigh = *GetSrc(Op->Header.Args[1]); + int32_t Divisor = *GetSrc(Op->Header.Args[2]); + int64_t Source = (static_cast(SrcHigh) << 32) | SrcLow; + int64_t Res = Source % Divisor; + + // We only store the lower bits of the result + GD = static_cast(Res); + break; + } + + case 8: { + uint64_t SrcLow = *GetSrc(Op->Header.Args[0]); + uint64_t SrcHigh = *GetSrc(Op->Header.Args[1]); + int64_t Divisor = *GetSrc(Op->Header.Args[2]); + __int128_t Source = (static_cast<__int128_t>(SrcHigh) << 64) | SrcLow; + __int128_t Res = Source % Divisor; + // We only store the lower bits of the result + memcpy(GDP, &Res, Size); + break; + } + default: LogMan::Msg::A("Unknown LREM Size: %d", Size); break; + } + break; + } + + default: + LogMan::Msg::A("Unknown IR Op: %d(%s)", IROp->Op, FEXCore::IR::GetName(IROp->Op).data()); + break; + } + ++Begin; + } + + Thread->Stats.InstructionsExecuted.fetch_add(DebugData->second.GuestInstructionCount); +} + +FEXCore::CPU::CPUBackend *CreateInterpreterCore(FEXCore::Context::Context *ctx) { + return new InterpreterCore(ctx); +} + +} diff --git a/Source/Interface/Core/Interpreter/InterpreterCore.h b/Source/Interface/Core/Interpreter/InterpreterCore.h new file mode 100644 index 000000000..8de2183c7 --- /dev/null +++ b/Source/Interface/Core/Interpreter/InterpreterCore.h @@ -0,0 +1,12 @@ +#pragma once + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore::CPU { +class CPUBackend; + +FEXCore::CPU::CPUBackend *CreateInterpreterCore(FEXCore::Context::Context *ctx); + +} diff --git a/Source/Interface/Core/JIT/Arm64/JIT.cpp b/Source/Interface/Core/JIT/Arm64/JIT.cpp new file mode 100644 index 000000000..b691281a0 --- /dev/null +++ b/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -0,0 +1,1452 @@ +#include "Interface/Context/Context.h" +#include "Interface/Core/RegisterAllocation.h" +#include "Interface/Core/InternalThreadState.h" + +#include "Interface/Core/JIT/x86_64/JIT.h" +#include "Interface/HLE/Syscalls.h" + +#if _M_X86_64 +#define VIXL_INCLUDE_SIMULATOR_AARCH64 +#include "aarch64/simulator-aarch64.h" +#endif +#include "aarch64/assembler-aarch64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" + +#include +#include +#include + +#include + +namespace FEXCore::CPU { +using namespace vixl; +using namespace vixl::aarch64; +#define STATE x0 +#define MEM_BASE x1 +#define TMP1 x2 +#define TMP2 x3 + +#define VTMP1 v1 +#define VTMP2 v2 +#define VTMP3 v3 + +static uint64_t SyscallThunk(FEXCore::Core::InternalThreadState *Thread, FEXCore::SyscallHandler *Handler, FEXCore::HLE::SyscallArguments *Args) +{ + return Handler->HandleSyscall(Thread, Args); +} + +static void CPUIDThunk(FEXCore::CPUIDEmu *CPUID, uint64_t Function, FEXCore::CPUIDEmu::FunctionResults *Results) { + FEXCore::CPUIDEmu::FunctionResults Res = CPUID->RunFunction(Function); + memcpy(Results, &Res, sizeof(FEXCore::CPUIDEmu::FunctionResults)); +} + +// XXX: Switch from MacroAssembler to Assembler once we drop the simulator +class JITCore final : public CPUBackend, public vixl::aarch64::MacroAssembler { +public: + explicit JITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread); + ~JITCore() override; + std::string GetName() override { return "JIT"; } + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) override; + + void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } + + bool NeedsOpDispatch() override { return true; } + +#if _M_X86_64 + void SimulationExecution(FEXCore::Core::InternalThreadState *Thread); +#endif + +private: + FEXCore::Context::Context *CTX; + FEXCore::Core::InternalThreadState *State; + FEXCore::IR::IRListView const *CurrentIR; + + std::unordered_map JumpTargets; + + /** + * @name Register Allocation + * @{ */ + constexpr static uint32_t NumGPRs = 15; + constexpr static uint32_t NumFPRs = 22; + constexpr static uint32_t RegisterCount = NumGPRs + NumFPRs; + constexpr static uint32_t RegisterClasses = 2; + + constexpr static uint32_t GPRBase = 0; + constexpr static uint32_t GPRClass = 0; + constexpr static uint32_t FPRBase = NumGPRs; + constexpr static uint32_t FPRClass = 1; + + RA::RegisterSet *RASet; + /** @} */ + + void FindNodeClasses(); + bool CalculateLiveRange(uint32_t Nodes); + constexpr static uint8_t RA_32 = 0; + constexpr static uint8_t RA_64 = 1; + constexpr static uint8_t RA_FPR = 2; + + bool HasRA = false; + RA::RegisterGraph *Graph; + uint32_t GetPhys(uint32_t Node); + + template + aarch64::Register GetSrc(uint32_t Node); + + template + aarch64::Register GetDst(uint32_t Node); + + aarch64::VRegister GetSrc(uint32_t Node); + aarch64::VRegister GetDst(uint32_t Node); + + struct LiveRange { + uint32_t Begin; + uint32_t End; + }; + + std::vector LiveRanges; +#if DEBUG || _M_X86_64 + vixl::aarch64::Decoder Decoder; +#endif + +#if DEBUG + vixl::aarch64::Disassembler Disasm; +#endif + +#if _M_X86_64 + vixl::aarch64::Simulator Sim; + std::unordered_map> HostToGuest; +#endif + void LoadConstant(vixl::aarch64::Register Reg, uint64_t Constant); +}; + +#if _M_X86_64 +static void SimulatorExecution(FEXCore::Core::InternalThreadState *Thread) { + JITCore *Core = reinterpret_cast(Thread->CPUBackend.get()); + Core->SimulationExecution(Thread); +} + +void JITCore::SimulationExecution(FEXCore::Core::InternalThreadState *Thread) { + using namespace vixl::aarch64; + auto SimulatorAddress = HostToGuest[Thread->State.State.rip]; + //PrintDisassembler PrintDisasm(stdout); + //PrintDisasm.DisassembleBuffer(vixl::aarch64::Instruction::Cast(SimulatorAddress.first), vixl::aarch64::Instruction::Cast(SimulatorAddress.second)); + + Sim.WriteXRegister(0, reinterpret_cast(Thread)); + Sim.RunFrom(vixl::aarch64::Instruction::Cast(SimulatorAddress.first)); +} + +#endif + +JITCore::JITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread) + : vixl::aarch64::MacroAssembler(1024 * 1024 * 128, vixl::aarch64::PositionDependentCode) + , CTX {ctx} + , State {Thread} +#if _M_X86_64 + , Sim {&Decoder} +#endif +{ + // XXX: Set this to a real minimum feature set in the future + SetCPUFeatures(vixl::CPUFeatures::All()); + + RASet = RA::AllocateRegisterSet(RegisterCount, RegisterClasses); + RA::AddRegisters(RASet, GPRClass, GPRBase, NumGPRs); + RA::AddRegisters(RASet, FPRClass, FPRBase, NumFPRs); + + Graph = RA::AllocateRegisterGraph(RASet, 9000); + LiveRanges.resize(9000); + + // Just set the entire range as executable + auto Buffer = GetBuffer(); + mprotect(Buffer->GetOffsetAddress(0), Buffer->GetCapacity(), PROT_READ | PROT_WRITE | PROT_EXEC); +#if DEBUG + Decoder.AppendVisitor(&Disasm) +#endif +#if _M_X86_64 + Sim.SetCPUFeatures(vixl::CPUFeatures::All()); +#endif +} + +JITCore::~JITCore() { + FreeRegisterGraph(Graph); + FreeRegisterSet(RASet); +} + +void JITCore::LoadConstant(vixl::aarch64::Register Reg, uint64_t Constant) { + bool Is64Bit = Reg.IsX(); + int Segments = Is64Bit ? 4 : 2; + + movz(Reg, (Constant) & 0xFFFF, 0); + for (int i = 1; i < Segments; ++i) { + uint16_t Part = (Constant >> (i * 16)) & 0xFFFF; + if (Part) { + movk(Reg, Part, i * 16); + } + } +} + +const std::array RA64 = { + x4, x5, x6, x7, x8, x9, + x10, x11, x12, x13, x14, x15, + x16, x17, x18}; +const std::array RA32 = { + w4, w5, w6, w7, w8, w9, + w10, w11, w12, w13, w14, w15, + w16, w17, w18}; + +// v8..v15 = (lower 64bits) Callee saved +const std::array RAFPR = { + v3, v4, v5, v6, v7, v8, v16, + v17, v18, v19, v20, v21, v22, + v23, v24, v25, v26, v27, v28, + v29, v30, v31}; + +uint32_t JITCore::GetPhys(uint32_t Node) { + uint32_t Reg = RA::GetNodeRegister(Graph, Node); + + if (Reg < FPRBase) + return Reg; + else if (Reg != ~0U) + return Reg - FPRBase; + else + LogMan::Msg::A("Couldn't Allocate register for node: ssa%d", Node); + + return ~0U; +} + +template +aarch64::Register JITCore::GetSrc(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + if (RAType == RA_64) + return RA64[Reg]; + else if (RAType == RA_32) + return RA32[Reg]; +} + +template +aarch64::Register JITCore::GetDst(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + if (RAType == RA_64) + return RA64[Reg]; + else if (RAType == RA_32) + return RA32[Reg]; +} + +aarch64::VRegister JITCore::GetSrc(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + return RAFPR[Reg]; +} + +aarch64::VRegister JITCore::GetDst(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + return RAFPR[Reg]; +} + +void JITCore::FindNodeClasses() { + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + while (Begin != End) { + using namespace FEXCore::IR; + + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + if (IROp->HasDest) { + // XXX: This needs to be better + switch (IROp->Op) { + case OP_LOADCONTEXT: { + auto Op = IROp->C(); + if (Op->Size == 16) + RA::SetNodeClass(Graph, WrapperOp->ID(), FPRClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + if (Op->Size == 16) + RA::SetNodeClass(Graph, WrapperOp->ID(), FPRClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + + case OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + + if (Op->SrcSize == 64) { + RA::SetNodeClass(Graph, WrapperOp->ID(), FPRClass); + } + else { + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + } + break; + } + case OP_CPUID: RA::SetNodeClass(Graph, WrapperOp->ID(), FPRClass); break; + default: + if (IROp->Op >= IR::OP_VOR) + RA::SetNodeClass(Graph, WrapperOp->ID(), FPRClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + } + ++Begin; + } +} + +bool JITCore::CalculateLiveRange(uint32_t Nodes) { + if (Nodes > LiveRanges.size()) { + LiveRanges.resize(Nodes); + } + memset(&LiveRanges.at(0), 0xFF, Nodes * sizeof(LiveRange)); + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + while (Begin != End) { + using namespace FEXCore::IR; + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + uint32_t Node = WrapperOp->ID(); + + // If the destination hasn't yet been set then set it now + if (IROp->HasDest && LiveRanges[Node].Begin == ~0U) { + LiveRanges[Node].Begin = Node; + // Default to ending right where it starts + LiveRanges[Node].End = Node; + } + + for (uint8_t i = 0; i < IROp->NumArgs; ++i) { + uint32_t ArgNode = IROp->Args[i].ID(); + // Set the node end to be at least here + LiveRanges[ArgNode].End = Node; + } + + ++Begin; + } + + // Now that we have all the live ranges calculated we need to add them to our interference graph + for (uint32_t i = 0; i < Nodes; ++i) { + for (uint32_t j = i + 1; j < Nodes; ++j) { + if (!(LiveRanges[i].Begin >= LiveRanges[j].End || + LiveRanges[j].Begin >= LiveRanges[i].End)) { + RA::AddNodeInterference(Graph, i, j); + } + } + } + + return RA::AllocateRegisters(Graph); +} + +void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData) { + using namespace aarch64; + JumpTargets.clear(); + CurrentIR = IR; + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + uintptr_t ListSize = CurrentIR->GetListSize(); + uint32_t SSACount = ListSize / sizeof(IR::OrderedNode); + + ResetRegisterGraph(Graph, SSACount); + FindNodeClasses(); + HasRA = CalculateLiveRange(SSACount); + + LogMan::Throw::A(HasRA, "Arm64 JIT only works with RA"); + + // AAPCS64 + // r30 = LR + // r29 = FP + // r19..r28 = Callee saved + // r18 = Platform Register (Matters if we target Windows or iOS) + // r16..r17 = Inter-procedure scratch + // r9..r15 = Temp + // r8 = Indirect Result + // r0...r7 = Parameter/Results + // + // FPRS: + // v8..v15 = (lower 64bits) Callee saved + + // Our allocation: + // X0 = ThreadState + // X1 = MemBase + // + // X1-X3 = Temp + // X4-r18 = RA + + auto Buffer = GetBuffer(); + auto Entry = Buffer->GetOffsetAddress(GetCursorOffset()); + + void *Memory = CTX->MemoryMapper.GetMemoryBase(); + LoadConstant(MEM_BASE, (uint64_t)Memory); + + while (Begin != End) { + using namespace FEXCore::IR; + + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + uint8_t OpSize = IROp->Size; + uint32_t Node = WrapperOp->ID(); + + if (HasRA) { + if (0) { + std::stringstream Inst; + auto Name = FEXCore::IR::GetName(IROp->Op); + + if (IROp->HasDest) { + uint32_t PhysReg = RA::GetNodeRegister(Graph, Node); + if (PhysReg >= FPRBase) + Inst << "\tFPR" << GetPhys(Node) << " = " << Name << " "; + else + Inst << "\tReg" << GetPhys(Node) << " = " << Name << " "; + } + else { + Inst << "\t" << Name << " "; + } + + for (uint8_t i = 0; i < IROp->NumArgs; ++i) { + uint32_t ArgNode = IROp->Args[i].ID(); + uint32_t PhysReg = RA::GetNodeRegister(Graph, ArgNode); + if (PhysReg >= FPRBase) + Inst << "FPR" << GetPhys(ArgNode) << (i + 1 == IROp->NumArgs ? "" : ", "); + else + Inst << "Reg" << GetPhys(ArgNode) << (i + 1 == IROp->NumArgs ? "" : ", "); + } + + LogMan::Msg::D("%s", Inst.str().c_str()); + } + } + + switch (IROp->Op) { + case IR::OP_BEGINBLOCK: { + auto IsTarget = JumpTargets.find(WrapperOp->ID()); + if (IsTarget == JumpTargets.end()) { + // XXX: This is a memory leak + JumpTargets.try_emplace(WrapperOp->ID(), new aarch64::Label); + } + else { + bind(IsTarget->second); + } + break; + } + case IR::OP_ENDBLOCK: { + auto Op = IROp->C(); + if (Op->RIPIncrement) { + ldr(TMP1, MemOperand(STATE, offsetof(FEXCore::Core::CPUState, rip))); + add(TMP1, TMP1, Operand(Op->RIPIncrement)); + str(TMP1, MemOperand(STATE, offsetof(FEXCore::Core::CPUState, rip))); + } + break; + } + case IR::OP_EXITFUNCTION: + case IR::OP_ENDFUNCTION: { + ret(); + break; + } + case IR::OP_SYSCALL: { + auto Op = IROp->C(); + // Arguments are passed as follows: + // X0: SyscallHandler + // X1: ThreadState + // X2: Pointer to SyscallArguments + + uint64_t SPOffset = AlignUp((2 + RA64.size() + 7 + 2) * 8, 16); + + sub(sp, sp, SPOffset); + for (uint32_t i = 0; i < 7; ++i) + str(GetSrc(Op->Header.Args[i].ID()), MemOperand(sp, 0 + i * 8)); + + int i = 0; + for (auto RA : RA64) { + str(RA, MemOperand(sp, 7 * 8 + i * 8)); + i++; + } + str(STATE, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 1 * 8)); + str(MEM_BASE, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 2 * 8)); + str(lr, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 3 * 8)); + + // x0 = threadstate already + LoadConstant(x1, reinterpret_cast(&CTX->SyscallHandler)); + mov (x2, sp); + CallRuntime(SyscallThunk); + + // Result is now in x0 + // Fix the stack and any values that were stepped on + i = 0; + for (auto RA : RA64) { + ldr(RA, MemOperand(sp, 7 * 8 + i * 8)); + i++; + } + + // Move result to its destination register + mov(GetDst(Node), x0); + + ldr(STATE, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 1 * 8)); + ldr(MEM_BASE, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 2 * 8)); + ldr(lr, MemOperand(sp, 7 * 8 + RA64.size() * 8 + 3 * 8)); + + add(sp, sp, SPOffset); + break; + } + case IR::OP_CPUID: { + auto Op = IROp->C(); + + uint64_t SPOffset = AlignUp((RA64.size() + 2 + 2) * 8 + sizeof(FEXCore::CPUIDEmu::FunctionResults), 16); + sub(sp, sp, SPOffset); + + int i = 0; + for (auto RA : RA64) { + str(RA, MemOperand(sp, 0 + i * 8)); + i++; + } + + str(STATE, MemOperand(sp, RA64.size() * 8 + 0 * 8)); + str(MEM_BASE, MemOperand(sp, RA64.size() * 8 + 1 * 8)); + str(lr, MemOperand(sp, RA64.size() * 8 + 2 * 8)); + + // x0 = CPUID Handler + // x1 = CPUID Function + // x2 = Result location + LoadConstant(x0, reinterpret_cast(&CTX->CPUID)); + mov(x1, GetSrc(Op->Header.Args[0].ID())); + add(x2, sp, RA64.size() * 8 + 3 * 8); + CallRuntime(CPUIDThunk); + + i = 0; + for (auto RA : RA64) { + ldr(RA, MemOperand(sp, 0 + i * 8)); + i++; + } + + // Results are in x0, x1 + // Results want to be in a i32v4 vector + auto Dst = GetDst(Node); + ldr(Dst, MemOperand(sp, RA64.size() * 8 + 3 * 8)); + + ldr(STATE, MemOperand(sp, RA64.size() * 8 + 0 * 8)); + ldr(MEM_BASE, MemOperand(sp, RA64.size() * 8 + 1 * 8)); + ldr(lr, MemOperand(sp, RA64.size() * 8 + 2 * 8)); + + add(sp, sp, SPOffset); + + break; + } + case IR::OP_EXTRACTELEMENT: { + auto Op = IROp->C(); + + uint32_t PhysReg = RA::GetNodeRegister(Graph, Op->Header.Args[0].ID()); + if (PhysReg >= FPRBase) { + switch (OpSize) { + case 4: + umov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V4S(), Op->Idx); + break; + case 8: + umov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V2D(), Op->Idx); + break; + default: LogMan::Msg::A("Unhandled ExtractElementSize: %d", OpSize); + } + } + else { + LogMan::Msg::A("Can't handle extract from GPR yet"); + } + break; + } + case IR::OP_JUMP: { + auto Op = IROp->C(); + + Label *TargetLabel; + auto IsTarget = JumpTargets.find(Op->Header.Args[0].ID()); + if (IsTarget == JumpTargets.end()) { + TargetLabel = JumpTargets.try_emplace(Op->Header.Args[0].ID(), new aarch64::Label).first->second; + } + else { + TargetLabel = IsTarget->second; + } + + b(TargetLabel); + break; + } + + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + + Label *TargetLabel; + auto IsTarget = JumpTargets.find(Op->Header.Args[1].ID()); + if (IsTarget == JumpTargets.end()) { + // XXX: This is a memory leak + TargetLabel = JumpTargets.try_emplace(Op->Header.Args[1].ID(), new aarch64::Label).first->second; + } + else { + TargetLabel = IsTarget->second; + } + + cbnz(GetSrc(Op->Header.Args[0].ID()), TargetLabel); + break; + } + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); + switch (Op->Size) { + case 1: + ldrb(GetDst(Node), MemOperand(STATE, Op->Offset)); + break; + case 2: + ldrh(GetDst(Node), MemOperand(STATE, Op->Offset)); + break; + case 4: + ldr(GetDst(Node), MemOperand(STATE, Op->Offset)); + break; + case 8: + ldr(GetDst(Node), MemOperand(STATE, Op->Offset)); + break; + case 16: + ldr(GetDst(Node), MemOperand(STATE, Op->Offset)); + break; + default: LogMan::Msg::A("Unhandled LoadContext size: %d", Op->Size); + } + break; + } + case IR::OP_STORECONTEXT: { + auto Op = IROp->C(); + switch (Op->Size) { + case 1: + strb(GetSrc(Op->Header.Args[0].ID()), MemOperand(STATE, Op->Offset)); + break; + case 2: + strh(GetSrc(Op->Header.Args[0].ID()), MemOperand(STATE, Op->Offset)); + break; + case 4: + str(GetSrc(Op->Header.Args[0].ID()), MemOperand(STATE, Op->Offset)); + break; + case 8: + str(GetSrc(Op->Header.Args[0].ID()), MemOperand(STATE, Op->Offset)); + break; + case 16: + str(GetSrc(Op->Header.Args[0].ID()), MemOperand(STATE, Op->Offset)); + break; + default: LogMan::Msg::A("Unhandled LoadContext size: %d", Op->Size); + } + break; + } + case IR::OP_STOREFLAG: { + auto Op = IROp->C(); + and_(TMP1, GetSrc(Op->Header.Args[0].ID()), 1); + strb(TMP1, MemOperand(STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)); + break; + } + case IR::OP_LOADFLAG: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + ldrb(Dst, MemOperand(STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)); + and_(Dst, Dst, 1); + break; + } + case IR::OP_BREAK: { + auto Op = IROp->C(); + switch (Op->Reason) { + case 4: // HLT + hlt(4); + break; + default: LogMan::Msg::A("Unknown Break reason: %d", Op->Reason); + } + break; + } + case IR::OP_CONSTANT: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + LoadConstant(Dst, Op->Constant); + break; + } + case IR::OP_ADD: { + auto Op = IROp->C(); + add(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_SUB: { + auto Op = IROp->C(); + sub(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_AND: { + auto Op = IROp->C(); + and_(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_XOR: { + auto Op = IROp->C(); + eor(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_OR: { + auto Op = IROp->C(); + orr(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_MOV: { + auto Op = IROp->C(); + mov(GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + break; + } + case IR::OP_LSHR: { + auto Op = IROp->C(); + if (OpSize == 8) + lsrv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + else + lsrv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_ASHR: { + auto Op = IROp->C(); + if (OpSize == 8) + asrv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + else + asrv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_LSHL: { + auto Op = IROp->C(); + if (OpSize == 8) + lslv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + else + lslv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_ROR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + switch (OpSize) { + case 4: { + rorv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 8: { + rorv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + + default: LogMan::Msg::A("Unhandled ROR size: %d", OpSize); + } + break; + } + + case IR::OP_ROL: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + switch (OpSize) { + case 4: { + movz(TMP1, 32); + sub(TMP1.W(), TMP1.W(), GetSrc(Op->Header.Args[1].ID())); + rorv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), TMP1.W()); + break; + } + case 8: { + movz(TMP1, 64); + sub(TMP1, TMP1, GetSrc(Op->Header.Args[1].ID())); + rorv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), TMP1); + break; + } + + default: LogMan::Msg::A("Unhandled ROL size: %d", OpSize); + } + break; + } + + case IR::OP_SEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + auto Dst = GetDst(Node); + + switch (Op->SrcSize / 8) { + case 1: + sxtb(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + case 2: + sxth(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + case 4: + sxtw(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + case 8: + mov(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", Op->SrcSize / 8); + } + break; + } + case IR::OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + uint32_t PhysReg = RA::GetNodeRegister(Graph, Op->Header.Args[0].ID()); + if (PhysReg >= FPRBase) { + // FPR -> GPR transfer with free truncation + switch (Op->SrcSize) { + case 8: + mov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V16B(), 0); + break; + case 16: + mov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V8H(), 0); + break; + case 32: + mov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V4S(), 0); + break; + case 64: + mov(GetDst(Node), GetSrc(Op->Header.Args[0].ID()).V2D(), 0); + break; + default: LogMan::Msg::A("Unhandled Zext size: %d", Op->SrcSize); break; + } + } + else { + if (Op->SrcSize == 64) { + // GPR->FPR transfer + auto Dst = GetDst(Node); + eor(Dst.V16B(), Dst.V16B(), Dst.V16B()); + ins(Dst.V2D(), 0, GetSrc(Op->Header.Args[0].ID())); + } + else { + and_(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), ((1ULL << Op->SrcSize) - 1)); + } + } + break; + } + case IR::OP_MUL: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + + switch (OpSize) { + case 1: + sxtb(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxtb(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst, TMP1, TMP2); + sxtb(Dst, Dst); + break; + case 2: + sxth(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxth(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst, TMP1, TMP2); + sxth(Dst, Dst); + break; + case 4: + sxtw(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxtw(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst.W(), TMP1.W(), TMP2.W()); + sxtw(Dst, Dst); + break; + case 8: + mul(Dst, GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMUL: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + + switch (OpSize) { + case 1: + uxtb(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxtb(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst, TMP1, TMP2); + uxtb(Dst, Dst); + break; + case 2: + uxth(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxth(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst, TMP1, TMP2); + uxth(Dst, Dst); + break; + case 4: + uxtw(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxtw(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(Dst.W(), TMP1.W(), TMP2.W()); + uxtw(Dst, Dst); + break; + case 8: + mul(Dst, GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + + case IR::OP_BFE: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "OpSize is too large for BFE: %d", OpSize); + auto Dst = GetDst(Node); + if (OpSize == 16) { + LogMan::Throw::A(!(Op->lsb < 64 && (Op->lsb + Op->Width > 64)), "Trying to BFE an XMM across the 64bit split: Beginning at %d, ending at %d", Op->lsb, Op->lsb + Op->Width); + uint8_t Offset = Op->lsb; + if (Offset < 64) { + mov(Dst, GetSrc(Op->Header.Args[0].ID()), 0); + } + else { + mov(Dst, GetSrc(Op->Header.Args[0].ID()), 1); + Offset -= 64; + } + + if (Offset) { + lsr(Dst, Dst, Offset); + } + + if (Op->Width != 64) { + ubfx(Dst, Dst, 0, Op->Width); + } + } + else { + lsr(Dst, GetSrc(Op->Header.Args[0].ID()), Op->lsb); + and_(Dst, Dst, ((1ULL << Op->Width) - 1)); + } + break; + } + case IR::OP_POPCOUNT: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + fmov(VTMP1, GetSrc(Op->Header.Args[0].ID())); + cnt(VTMP1.V8B(), VTMP1.V8B()); + addv(VTMP1.B(), VTMP1.V8B()); + umov(Dst, VTMP1.B(), 0); + break; + } + case IR::OP_FINDLSB: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + auto Src = GetSrc(Op->Header.Args[0].ID()); + if (OpSize != 8) { + ubfx(TMP1, Src, 0, OpSize * 8); + cmp(TMP1, 0); + rbit(TMP1, TMP1); + clz(Dst, TMP1); + csinv(Dst, Dst, xzr, ne); + } + else { + rbit(TMP1, Src); + cmp(Src, 0); + clz(Dst, TMP1); + csinv(Dst, Dst, xzr, ne); + } + + break; + } + case IR::OP_FINDMSB: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + movz(TMP1, OpSize * 8); + clz(Dst, GetSrc(Op->Header.Args[0].ID())); + sub(Dst, TMP1, Dst); + break; + } + case IR::OP_CAS: { + auto Op = IROp->C(); + // Args[0]: Expected + // Args[1]: Desired + // Args[2]: Pointer + // DataSrc = *Src1 + // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc + // This will write to memory! Careful! + + auto Expected = GetSrc(Op->Header.Args[0].ID()); + auto Desired = GetSrc(Op->Header.Args[1].ID()); + auto MemSrc = GetSrc(Op->Header.Args[2].ID()); + + add(TMP1, MEM_BASE, MemSrc); + mov(TMP2, Expected); + + switch (OpSize) { + case 1: casalb(TMP2.W(), Desired.W(), MemOperand(TMP1)); break; + case 2: casalh(TMP2.W(), Desired.W(), MemOperand(TMP1)); break; + case 4: casal(TMP2.W(), Desired.W(), MemOperand(TMP1)); break; + case 8: casal(TMP2.X(), Desired.X(), MemOperand(TMP1)); break; + default: LogMan::Msg::A("Unsupported: %d", OpSize); + } + + mov(GetDst(Node), TMP2); + break; + } + + case IR::OP_SELECT: { + auto Op = IROp->C(); + + cmp(GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + + switch (Op->Cond) { + case FEXCore::IR::COND_EQ: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::eq); + break; + case FEXCore::IR::COND_NEQ: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::ne); + break; + case FEXCore::IR::COND_GE: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::ge); + break; + case FEXCore::IR::COND_LT: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::lo); + break; + case FEXCore::IR::COND_GT: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::gt); + break; + case FEXCore::IR::COND_LE: + csel(GetDst(Node), GetSrc(Op->Header.Args[2].ID()), GetSrc(Op->Header.Args[3].ID()), Condition::le); + break; + case FEXCore::IR::COND_CS: + case FEXCore::IR::COND_CC: + case FEXCore::IR::COND_MI: + case FEXCore::IR::COND_PL: + case FEXCore::IR::COND_VS: + case FEXCore::IR::COND_VC: + case FEXCore::IR::COND_HI: + case FEXCore::IR::COND_LS: + default: + LogMan::Msg::A("Unsupported compare type"); + break; + } + + break; + } + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + + auto Dst = GetDst(Node); + switch (Op->Size) { + case 1: + ldrb(Dst, MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 2: + ldrh(Dst, MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 4: + ldr(Dst.W(), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 8: + ldr(Dst, MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 16: + ldr(GetDst(Node), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + default: LogMan::Msg::A("Unhandled LoadMem size: %d", Op->Size); + } + break; + } + case IR::OP_STOREMEM: { + auto Op = IROp->C(); + switch (Op->Size) { + case 1: + strb(GetSrc(Op->Header.Args[1].ID()), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 2: + strh(GetSrc(Op->Header.Args[1].ID()), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 4: + str(GetSrc(Op->Header.Args[1].ID()), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 8: + str(GetSrc(Op->Header.Args[1].ID()), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + case 16: + str(GetSrc(Op->Header.Args[1].ID()), MemOperand(MEM_BASE, GetSrc(Op->Header.Args[0].ID()))); + break; + default: LogMan::Msg::A("Unhandled StoreMem size: %d", Op->Size); + } + break; + } + case IR::OP_MULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + sxtb(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxtb(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + sbfx(GetDst(Node), TMP1, 8, 8); + break; + case 2: + sxth(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxth(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + sbfx(GetDst(Node), TMP1, 16, 16); + break; + case 4: + sxtw(TMP1, GetSrc(Op->Header.Args[0].ID())); + sxtw(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + sbfx(GetDst(Node), TMP1, 32, 32); + break; + case 8: + smulh(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + uxtb(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxtb(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + ubfx(GetDst(Node), TMP1, 8, 8); + break; + case 2: + uxth(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxth(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + ubfx(GetDst(Node), TMP1, 16, 16); + break; + case 4: + uxtw(TMP1, GetSrc(Op->Header.Args[0].ID())); + uxtw(TMP2, GetSrc(Op->Header.Args[1].ID())); + mul(TMP1, TMP1, TMP2); + ubfx(GetDst(Node), TMP1, 32, 32); + break; + case 8: + umulh(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_LUDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(TMP1, GetSrc(Op->Header.Args[0].ID())); + bfi(TMP1, GetSrc(Op->Header.Args[1].ID()), 32, 32); + udiv(GetDst(Node), TMP1, GetSrc(Op->Header.Args[2].ID())); + break; + } + case 8: { + udiv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[2].ID())); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(TMP1, GetSrc(Op->Header.Args[0].ID())); + bfi(TMP1, GetSrc(Op->Header.Args[1].ID()), 32, 32); + sdiv(GetDst(Node), TMP1, GetSrc(Op->Header.Args[2].ID())); + break; + } + case 8: { + sdiv(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[2].ID())); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + + case IR::OP_LUREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + auto Divisor = GetSrc(Op->Header.Args[2].ID()); + + mov(TMP1, GetSrc(Op->Header.Args[0].ID())); + bfi(TMP1, GetSrc(Op->Header.Args[1].ID()), 32, 32); + udiv(TMP2, TMP1, Divisor); + + msub(GetDst(Node), TMP2, Divisor, TMP1); + break; + } + case 8: { + auto Dividend = GetSrc(Op->Header.Args[0].ID()); + auto Divisor = GetSrc(Op->Header.Args[2].ID()); + + udiv(TMP1, Dividend, Divisor); + msub(GetDst(Node), TMP1, Divisor, Dividend); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + auto Divisor = GetSrc(Op->Header.Args[2].ID()); + + mov(TMP1, GetSrc(Op->Header.Args[0].ID())); + bfi(TMP1, GetSrc(Op->Header.Args[1].ID()), 32, 32); + sdiv(TMP2, TMP1, Divisor); + + msub(GetDst(Node), TMP2, Divisor, TMP1); + break; + } + case 8: { + auto Dividend = GetSrc(Op->Header.Args[0].ID()); + auto Divisor = GetSrc(Op->Header.Args[2].ID()); + + sdiv(TMP1, Dividend, Divisor); + msub(GetDst(Node), TMP1, Divisor, Dividend); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + + case IR::OP_VINSELEMENT: { + auto Op = IROp->C(); + mov(VTMP1, GetSrc(Op->Header.Args[0].ID())); + switch (Op->ElementSize) { + case 1: { + mov(VTMP1.V16B(), Op->DestIdx, GetSrc(Op->Header.Args[1].ID()).V16B(), Op->SrcIdx); + break; + } + case 2: { + mov(VTMP1.V8H(), Op->DestIdx, GetSrc(Op->Header.Args[1].ID()).V8H(), Op->SrcIdx); + break; + } + case 4: { + mov(VTMP1.V4S(), Op->DestIdx, GetSrc(Op->Header.Args[1].ID()).V4S(), Op->SrcIdx); + break; + } + case 8: { + mov(VTMP1.V2D(), Op->DestIdx, GetSrc(Op->Header.Args[1].ID()).V2D(), Op->SrcIdx); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + mov(GetDst(Node), VTMP1); + break; + } + case IR::OP_VADD: { + auto Op = IROp->C(); + switch (Op->ElementSize) { + case 1: { + add(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + add(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + add(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + add(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VSUB: { + auto Op = IROp->C(); + switch (Op->ElementSize) { + case 1: { + sub(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + sub(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + sub(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + sub(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VCMPEQ: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + switch (Op->ElementSize) { + case 1: { + cmeq(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + cmeq(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + cmeq(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + cmeq(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VCMPGT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + switch (Op->ElementSize) { + case 1: { + cmgt(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + cmgt(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + cmgt(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + cmgt(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VZIP: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + switch (Op->ElementSize) { + case 1: { + zip1(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + zip1(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + zip1(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + zip1(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VZIP2: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + switch (Op->ElementSize) { + case 1: { + zip2(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case 2: { + zip2(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), GetSrc(Op->Header.Args[1].ID()).V8H()); + break; + } + case 4: { + zip2(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), GetSrc(Op->Header.Args[1].ID()).V4S()); + break; + } + case 8: { + zip2(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), GetSrc(Op->Header.Args[1].ID()).V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VOR: { + auto Op = IROp->C(); + orr(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case IR::OP_VXOR: { + auto Op = IROp->C(); + eor(GetDst(Node).V16B(), GetSrc(Op->Header.Args[0].ID()).V16B(), GetSrc(Op->Header.Args[1].ID()).V16B()); + break; + } + case IR::OP_VUSHLS: { + auto Op = IROp->C(); + + switch (Op->ElementSize) { + case 2: { + dup(VTMP1.V8H(), GetSrc(Op->Header.Args[1].ID())); + ushl(GetDst(Node).V8H(), GetSrc(Op->Header.Args[0].ID()).V8H(), VTMP1.V8H()); + break; + } + case 4: { + dup(VTMP1.V4S(), GetSrc(Op->Header.Args[1].ID())); + ushl(GetDst(Node).V4S(), GetSrc(Op->Header.Args[0].ID()).V4S(), VTMP1.V4S()); + break; + } + case 8: { + dup(VTMP1.V2D(), GetSrc(Op->Header.Args[1].ID())); + ushl(GetDst(Node).V2D(), GetSrc(Op->Header.Args[0].ID()).V2D(), VTMP1.V2D()); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_CYCLECOUNTER: { + if (0) + movz(GetDst(Node), 0); + else + mrs(GetDst(Node), CNTVCT_EL0); + break; + } + default: + LogMan::Msg::A("Unknown IR Op: %d(%s)", IROp->Op, FEXCore::IR::GetName(IROp->Op).data()); + break; + } + + ++Begin; + } + + FinalizeCode(); +#if _M_X86_64 + auto CodeEnd = Buffer->GetOffsetAddress(GetCursorOffset()); + HostToGuest[State->State.State.rip] = std::make_pair(Entry, CodeEnd); + return (void*)SimulatorExecution; +#else + return reinterpret_cast(Entry); +#endif +} + + +FEXCore::CPU::CPUBackend *CreateJITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread) { + return new JITCore(ctx, Thread); +} +} diff --git a/Source/Interface/Core/JIT/JITCore.h b/Source/Interface/Core/JIT/JITCore.h new file mode 100644 index 000000000..2306dd579 --- /dev/null +++ b/Source/Interface/Core/JIT/JITCore.h @@ -0,0 +1,15 @@ +#pragma once + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore::Core { +struct InternalThreadState; +} + +namespace FEXCore::CPU { +class CPUBackend; + +FEXCore::CPU::CPUBackend *CreateJITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread); +} diff --git a/Source/Interface/Core/JIT/x86_64/JIT.cpp b/Source/Interface/Core/JIT/x86_64/JIT.cpp new file mode 100644 index 000000000..b0468cce7 --- /dev/null +++ b/Source/Interface/Core/JIT/x86_64/JIT.cpp @@ -0,0 +1,2798 @@ +#include "Interface/Context/Context.h" +#include "Interface/Core/RegisterAllocation.h" +#include "Interface/Core/InternalThreadState.h" + +#include "Interface/Core/JIT/x86_64/JIT.h" +#include +using namespace Xbyak; + +#include +#include +#include + +namespace FEXCore::CPU { +// Temp registers +// rax, rcx, rdx, rsi, r8, r9, +// r10, r11 +// +// Callee Saved +// rbx, rbp, r12, r13, r14, r15 +// +// 1St Argument: rdi +// XMM: +// All temp +// r11 assigned to temp state +#define TEMP_STACK r11 +#define STATE rdi + +class JITCore final : public CPUBackend, public Xbyak::CodeGenerator { +public: + explicit JITCore(FEXCore::Context::Context *ctx); + ~JITCore() override; + std::string GetName() override { return "JIT"; } + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) override; + + void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } + + bool NeedsOpDispatch() override { return true; } + +private: + FEXCore::Context::Context *CTX; + FEXCore::IR::IRListView const *CurrentIR; + std::unordered_map JumpTargets; + + std::vector Stack; + bool MemoryDebug = false; + + /** + * @name Register Allocation + * @{ */ + constexpr static uint32_t NumGPRs = 11; + constexpr static uint32_t NumXMMs = 11; + constexpr static uint32_t RegisterCount = NumGPRs + NumXMMs; + constexpr static uint32_t RegisterClasses = 2; + + constexpr static uint32_t GPRBase = 0; + constexpr static uint32_t GPRClass = 0; + constexpr static uint32_t XMMBase = NumGPRs; + constexpr static uint32_t XMMClass = 1; + + RA::RegisterSet *RASet; + /** @} */ + + void FindNodeClasses(); + bool CalculateLiveRange(uint32_t Nodes); + constexpr static uint8_t RA_8 = 0; + constexpr static uint8_t RA_16 = 1; + constexpr static uint8_t RA_32 = 2; + constexpr static uint8_t RA_64 = 3; + constexpr static uint8_t RA_XMM = 4; + + bool HasRA = false; + RA::RegisterGraph *Graph; + uint32_t GetPhys(uint32_t Node); + + template + Xbyak::Reg GetSrc(uint32_t Node); + + template + Xbyak::Reg GetDst(uint32_t Node); + + Xbyak::Xmm GetSrc(uint32_t Node); + Xbyak::Xmm GetDst(uint32_t Node); + + struct LiveRange { + uint32_t Begin; + uint32_t End; + }; + + std::vector LiveRanges; +}; + +JITCore::JITCore(FEXCore::Context::Context *ctx) + : CodeGenerator(1024 * 1024 * 32) + , CTX {ctx} { + Stack.resize(9000 * 16 * 64); + + RASet = RA::AllocateRegisterSet(RegisterCount, RegisterClasses); + RA::AddRegisters(RASet, GPRClass, GPRBase, NumGPRs); + RA::AddRegisters(RASet, XMMClass, XMMBase, NumXMMs); + + Graph = RA::AllocateRegisterGraph(RASet, 9000); + LiveRanges.resize(9000); +} + +JITCore::~JITCore() { + printf("Used %ld bytes for compiling\n", getCurr() - getCode()); + FreeRegisterGraph(Graph); + FreeRegisterSet(RASet); +} + +static void LoadMem(uint64_t Addr, uint64_t Data, uint8_t Size) { + LogMan::Msg::D("Loading from guestmem: 0x%lx (%d)", Addr, Size); + LogMan::Msg::D("\tLoading: 0x%016lx", Data); +} + +static void StoreMem(uint64_t Addr, uint64_t Data, uint8_t Size) { + LogMan::Msg::D("Storing guestmem: 0x%lx (%d)", Addr, Size); + LogMan::Msg::D("\tStoring: 0x%016lx", Data); +} + +void JITCore::FindNodeClasses() { + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + while (Begin != End) { + using namespace FEXCore::IR; + + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + if (IROp->HasDest) { + // XXX: This needs to be better + switch (IROp->Op) { + case OP_LOADCONTEXT: { + auto Op = IROp->C(); + if (Op->Size == 16) + RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + case OP_STORECONTEXT: { + auto Op = IROp->C(); + if (Op->Size == 16) + RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + if (Op->Size == 16) + RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + + case OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + + if (Op->SrcSize == 64) { + RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); + } + else { + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + } + break; + } + case OP_CPUID: RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); break; + default: + if (IROp->Op >= IR::OP_VOR) + RA::SetNodeClass(Graph, WrapperOp->ID(), XMMClass); + else + RA::SetNodeClass(Graph, WrapperOp->ID(), GPRClass); + break; + } + } + ++Begin; + } +} + +bool JITCore::CalculateLiveRange(uint32_t Nodes) { + if (Nodes > LiveRanges.size()) { + LiveRanges.resize(Nodes); + } + memset(&LiveRanges.at(0), 0xFF, Nodes * sizeof(LiveRange)); + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + while (Begin != End) { + using namespace FEXCore::IR; + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + uint32_t Node = WrapperOp->ID(); + + // If the destination hasn't yet been set then set it now + if (IROp->HasDest && LiveRanges[Node].Begin == ~0U) { + LiveRanges[Node].Begin = Node; + // Default to ending right where it starts + LiveRanges[Node].End = Node; + } + + for (uint8_t i = 0; i < IROp->NumArgs; ++i) { + uint32_t ArgNode = IROp->Args[i].ID(); + // Set the node end to be at least here + LiveRanges[ArgNode].End = Node; + } + + ++Begin; + } + + // Now that we have all the live ranges calculated we need to add them to our interference graph + for (uint32_t i = 0; i < Nodes; ++i) { + for (uint32_t j = i + 1; j < Nodes; ++j) { + if (!(LiveRanges[i].Begin >= LiveRanges[j].End || + LiveRanges[j].Begin >= LiveRanges[i].End)) { + RA::AddNodeInterference(Graph, i, j); + } + } + } + + return RA::AllocateRegisters(Graph); +} + +uint32_t JITCore::GetPhys(uint32_t Node) { + uint32_t Reg = RA::GetNodeRegister(Graph, Node); + + if (Reg < XMMBase) + return Reg; + else if (Reg != ~0U) + return Reg - XMMBase; + else + LogMan::Msg::A("Couldn't Allocate register for node: ssa%d", Node); + + return ~0U; +} + +using namespace Xbyak::util; +const std::array RA64 = { rsi, r8, r9, r10, r11, rbx, rbp, r12, r13, r14, r15 }; +const std::array RA32 = { esi, r8d, r9d, r10d, r11d, ebx, ebp, r12d, r13d, r14d, r15d }; +const std::array RA16 = { si, r8w, r9w, r10w, r11w, bx, bp, r12w, r13w, r14w, r15w }; +const std::array RA8 = { sil, r8b, r9b, r10b, r11b, bl, bpl, r12b, r13b, r14b, r15b }; +const std::array RAXMM = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10 }; +const std::array RAXMM_x = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10 }; + +template +Xbyak::Reg JITCore::GetSrc(uint32_t Node) { + // rax, rcx, rdx, rsi, r8, r9, + // r10 + // Callee Saved + // rbx, rbp, r12, r13, r14, r15 + uint32_t Reg = GetPhys(Node); + if (RAType == RA_64) + return RA64[Reg]; + else if (RAType == RA_XMM) + return RAXMM[Reg]; + else if (RAType == RA_32) + return RA32[Reg]; + else if (RAType == RA_16) + return RA16[Reg]; + else if (RAType == RA_8) + return RA8[Reg]; +} + +Xbyak::Xmm JITCore::GetSrc(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + return RAXMM_x[Reg]; +} + +template +Xbyak::Reg JITCore::GetDst(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + if (RAType == RA_64) + return RA64[Reg]; + else if (RAType == RA_XMM) + return RAXMM[Reg]; + else if (RAType == RA_32) + return RA32[Reg]; + else if (RAType == RA_16) + return RA16[Reg]; + else if (RAType == RA_8) + return RA8[Reg]; +} + +Xbyak::Xmm JITCore::GetDst(uint32_t Node) { + uint32_t Reg = GetPhys(Node); + return RAXMM_x[Reg]; +} + +void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData) { + JumpTargets.clear(); + CurrentIR = IR; + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + uintptr_t ListSize = CurrentIR->GetListSize(); + + uint32_t SSACount = ListSize / sizeof(IR::OrderedNode); + uint64_t ListStackSize = SSACount * 16; + if (ListStackSize > Stack.size()) { + Stack.resize(ListStackSize); + } + + void *Entry = getCurr(); + + ResetRegisterGraph(Graph, SSACount); + FindNodeClasses(); + HasRA = CalculateLiveRange(SSACount); + + if (HasRA) { + push(rbx); + push(rbp); + push(r12); + push(r13); + push(r14); + push(r15); + } + else { + mov(TEMP_STACK, reinterpret_cast(&Stack.at(0))); + } + while (Begin != End) { + using namespace FEXCore::IR; + + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + uint8_t OpSize = IROp->Size; + uint32_t Node = WrapperOp->ID(); + + if (HasRA) { +#ifdef DEBUG_RA + std::stringstream Inst; + auto Name = FEXCore::IR::GetName(IROp->Op); + + if (IROp->HasDest) { + uint32_t PhysReg = RA::GetNodeRegister(Graph, Node); + if (PhysReg >= XMMBase) + Inst << "\tXMM" << GetPhys(Node) << " = " << Name << " "; + else + Inst << "\tReg" << GetPhys(Node) << " = " << Name << " "; + } + else { + Inst << "\t" << Name << " "; + } + + for (uint8_t i = 0; i < IROp->NumArgs; ++i) { + uint32_t ArgNode = IROp->Args[i].ID(); + uint32_t PhysReg = RA::GetNodeRegister(Graph, ArgNode); + if (PhysReg >= XMMBase) + Inst << "XMM" << GetPhys(ArgNode) << (i + 1 == IROp->NumArgs ? "" : ", "); + else + Inst << "Reg" << GetPhys(ArgNode) << (i + 1 == IROp->NumArgs ? "" : ", "); + } + + LogMan::Msg::D("%s", Inst.str().c_str()); +#endif + } + + switch (IROp->Op) { + case IR::OP_BEGINBLOCK: { + auto IsTarget = JumpTargets.find(WrapperOp->ID()); + if (IsTarget == JumpTargets.end()) { + JumpTargets[WrapperOp->ID()] = L(); + } + else { + L(IsTarget->second); + } + break; + } + case IR::OP_ENDBLOCK: { + auto Op = IROp->C(); + if (Op->RIPIncrement) { + add(qword [STATE + offsetof(FEXCore::Core::CPUState, rip)], Op->RIPIncrement); + } + break; + } + case IR::OP_EXITFUNCTION: + case IR::OP_ENDFUNCTION: { + if (HasRA) { + pop(r15); + pop(r14); + pop(r13); + pop(r12); + pop(rbp); + pop(rbx); + } + ret(); + break; + } + case IR::OP_BREAK: { + auto Op = IROp->C(); + switch (Op->Reason) { + case 4: // HLT + ud2(); + break; + default: LogMan::Msg::A("Unknown Break reason: %d", Op->Reason); + } + } + break; + case IR::OP_JUMP: { + auto Op = IROp->C(); + + Label *TargetLabel; + auto IsTarget = JumpTargets.find(Op->Header.Args[0].ID()); + if (IsTarget == JumpTargets.end()) { + TargetLabel = &JumpTargets.try_emplace(Op->Header.Args[0].ID(), Label{}).first->second; + } + else { + TargetLabel = &IsTarget->second; + } + + jmp(*TargetLabel); + break; + } + default: break; + } + + if (HasRA) { + switch (IROp->Op) { + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + + Label *TargetLabel; + auto IsTarget = JumpTargets.find(Op->Header.Args[1].ID()); + if (IsTarget == JumpTargets.end()) { + TargetLabel = &JumpTargets.try_emplace(Op->Header.Args[1].ID(), Label{}).first->second; + } + else { + TargetLabel = &IsTarget->second; + } + + cmp(GetSrc(Op->Header.Args[0].ID()), 0); + jne(*TargetLabel); + break; + } + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); + switch (Op->Size) { + case 1: { + mov(GetDst(Node), byte [STATE + Op->Offset]); + } + break; + case 2: { + mov(GetDst(Node), word [STATE + Op->Offset]); + } + break; + case 4: { + mov(GetDst(Node), dword [STATE + Op->Offset]); + } + break; + case 8: { + mov(GetDst(Node), qword [STATE + Op->Offset]); + } + break; + case 16: { + if (Op->Offset % 16 == 0) + movaps(GetDst(Node), xword [STATE + Op->Offset]); + else + movups(GetDst(Node), xword [STATE + Op->Offset]); + } + break; + default: LogMan::Msg::A("Unhandled LoadContext size: %d", Op->Size); + } + break; + } + case IR::OP_STORECONTEXT: { + auto Op = IROp->C(); + + switch (Op->Size) { + case 1: { + mov(byte [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + } + break; + + case 2: { + mov(word [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + } + break; + case 4: { + mov(dword [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + } + break; + case 8: { + mov(qword [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + } + break; + case 16: { + if (Op->Offset % 16 == 0) + movaps(xword [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + else + movups(xword [STATE + Op->Offset], GetSrc(Op->Header.Args[0].ID())); + } + break; + default: LogMan::Msg::A("Unhandled StoreContext size: %d", Op->Size); + } + break; + } + case IR::OP_ADD: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[1].ID())); + add(rax, GetSrc(Op->Header.Args[0].ID())); + mov(Dst, rax); + break; + } + case IR::OP_SUB: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[0].ID())); + sub(rax, GetSrc(Op->Header.Args[1].ID())); + mov(Dst, rax); + break; + } + case IR::OP_XOR: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[1].ID())); + xor(rax, GetSrc(Op->Header.Args[0].ID())); + mov(Dst, rax); + break; + } + case IR::OP_AND: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[1].ID())); + and(rax, GetSrc(Op->Header.Args[0].ID())); + mov(Dst, rax); + break; + } + case IR::OP_OR: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[1].ID())); + or (rax, GetSrc(Op->Header.Args[0].ID())); + mov(Dst, rax); + break; + } + case IR::OP_MOV: { + auto Op = IROp->C(); + mov (GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + break; + } + case IR::OP_CONSTANT: { + auto Op = IROp->C(); + mov(GetDst(Node), Op->Constant); + break; + } + case IR::OP_POPCOUNT: { + auto Op = IROp->C(); + auto Dst64 = GetDst(Node); + + switch (OpSize) { + case 1: + movzx(GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + popcnt(Dst64, Dst64); + break; + case 2: { + movzx(GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + popcnt(Dst64, Dst64); + break; + } + case 4: + popcnt(GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + break; + case 8: + popcnt(GetDst(Node), GetSrc(Op->Header.Args[0].ID())); + break; + } + break; + } + case IR::OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + + uint32_t PhysReg = RA::GetNodeRegister(Graph, Op->Header.Args[0].ID()); + if (PhysReg >= XMMBase) { + // XMM -> GPR transfer with free truncation + switch (Op->SrcSize) { + case 8: + pextrb(al, GetSrc(Op->Header.Args[0].ID()), 0); + break; + case 16: + pextrw(ax, GetSrc(Op->Header.Args[0].ID()), 0); + break; + case 32: + pextrd(eax, GetSrc(Op->Header.Args[0].ID()), 0); + break; + case 64: + pextrw(rax, GetSrc(Op->Header.Args[0].ID()), 0); + break; + default: LogMan::Msg::A("Unhandled Zext size: %d", Op->SrcSize); break; + } + auto Dst = GetDst(Node); + mov(Dst, rax); + } + else { + if (Op->SrcSize == 64) { + vmovq(xmm15, Reg64(GetSrc(Op->Header.Args[0].ID()).getIdx())); + movapd(GetDst(Node), xmm15); + } + else { + auto Dst = GetDst(Node); + mov(rax, uint64_t((1ULL << Op->SrcSize) - 1)); + and(rax, GetSrc(Op->Header.Args[0].ID())); + mov(Dst, rax); + } + } + break; + } + case IR::OP_SEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + auto Dst = GetDst(Node); + + switch (Op->SrcSize / 8) { + case 1: + movsx(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + case 2: + movsx(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + case 4: + movsxd(Reg64(Dst.getIdx()), GetSrc(Op->Header.Args[0].ID())); + break; + case 8: + mov(Dst, GetSrc(Op->Header.Args[0].ID())); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", Op->SrcSize / 8); + } + break; + } + case IR::OP_BFE: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "OpSize is too large for BFE: %d", OpSize); + if (OpSize == 16) { + LogMan::Throw::A(!(Op->lsb < 64 && (Op->lsb + Op->Width > 64)), "Trying to BFE an XMM across the 64bit split: Beginning at %d, ending at %d", Op->lsb, Op->lsb + Op->Width); + movups(xmm15, GetSrc(Op->Header.Args[0].ID())); + uint8_t Offset = Op->lsb; + if (Offset < 64) { + pextrq(rax, xmm15, 0); + } + else { + pextrq(rax, xmm15, 1); + Offset -= 64; + } + + if (Offset) { + shr(rax, Offset); + } + + if (Op->Width != 64) { + mov(rcx, uint64_t((1ULL << Op->Width) - 1)); + and(rax, rcx); + } + + mov (GetDst(Node), rax); + } + else { + auto Dst = GetDst(Node); + mov(rax, GetSrc(Op->Header.Args[0].ID())); + + if (Op->lsb != 0) + shr(rax, Op->lsb); + + if (Op->Width != 64) { + mov(rcx, uint64_t((1ULL << Op->Width) - 1)); + and(rax, rcx); + } + mov(Dst, rax); + } + break; + } + case IR::OP_LSHR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + auto Dst = GetDst(Node); + mov (rcx, GetSrc(Op->Header.Args[1].ID())); + and(rcx, Mask); + + shrx(Reg32e(Dst.getIdx(), 64), GetSrc(Op->Header.Args[0].ID()), rcx); + break; + } + case IR::OP_LSHL: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + auto Dst = GetDst(Node); + mov (rcx, GetSrc(Op->Header.Args[1].ID())); + and(rcx, Mask); + + shlx(Reg32e(Dst.getIdx(), 64), GetSrc(Op->Header.Args[0].ID()), rcx); + break; + } + case IR::OP_ASHR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov (rcx, GetSrc(Op->Header.Args[1].ID())); + and(rcx, Mask); + switch (OpSize) { + case 1: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + sar(al, cl); + movsx(GetDst(Node), al); + break; + case 2: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + sar(ax, cl); + movsx(GetDst(Node), ax); + break; + case 4: + sarx(Reg32e(GetDst(Node).getIdx(), 32), GetSrc(Op->Header.Args[0].ID()), ecx); + break; + case 8: + sarx(Reg32e(GetDst(Node).getIdx(), 64), GetSrc(Op->Header.Args[0].ID()), rcx); + break; + default: LogMan::Msg::A("Unknown ASHR Size: %d\n", OpSize); break; + }; + break; + } + case IR::OP_ROL: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov (rcx, GetSrc(Op->Header.Args[1].ID())); + and(rcx, Mask); + switch (OpSize) { + case 1: { + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + rol(al, cl); + break; + } + case 2: { + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + rol(ax, cl); + break; + } + case 4: { + mov(eax, GetSrc(Op->Header.Args[0].ID())); + rol(eax, cl); + break; + } + case 8: { + mov(rax, GetSrc(Op->Header.Args[0].ID())); + rol(rax, cl); + break; + } + } + mov(GetDst(Node), rax); + break; + } + case IR::OP_ROR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov (rcx, GetSrc(Op->Header.Args[1].ID())); + and(rcx, Mask); + switch (OpSize) { + case 1: { + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + ror(al, cl); + break; + } + case 2: { + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + ror(ax, cl); + break; + } + case 4: { + mov(eax, GetSrc(Op->Header.Args[0].ID())); + ror(eax, cl); + break; + } + case 8: { + mov(rax, GetSrc(Op->Header.Args[0].ID())); + ror(rax, cl); + break; + } + } + mov(GetDst(Node), rax); + break; + } + case IR::OP_MUL: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + + switch (OpSize) { + case 1: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + movsx(rcx, GetSrc(Op->Header.Args[1].ID())); + imul(cl); + movsx(Dst, al); + break; + case 2: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + movsx(rcx, GetSrc(Op->Header.Args[1].ID())); + imul(cx); + movsx(Dst, ax); + break; + case 4: + movsxd(rax, GetSrc(Op->Header.Args[0].ID())); + imul(eax, GetSrc(Op->Header.Args[1].ID())); + movsx(Dst, eax); + break; + case 8: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + imul(rax, GetSrc(Op->Header.Args[1].ID())); + mov(Dst, rax); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_MULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + movsx(rcx, GetSrc(Op->Header.Args[1].ID())); + imul(cl); + movsx(rax, ax); + mov(GetDst(Node), rax); + break; + case 2: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + movsx(rcx, GetSrc(Op->Header.Args[1].ID())); + imul(cx); + movsx(rax, dx); + mov(GetDst(Node), rax); + break; + case 4: + movsx(rax, GetSrc(Op->Header.Args[0].ID())); + imul(GetSrc(Op->Header.Args[1].ID())); + movsxd(rax, edx); + mov(GetDst(Node), rdx); + break; + case 8: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + imul(GetSrc(Op->Header.Args[1].ID())); + mov(GetDst(Node), rdx); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMUL: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + movzx(rcx, GetSrc(Op->Header.Args[1].ID())); + mul(cl); + movzx(rax, al); + mov(GetDst(Node), rax); + break; + case 2: + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + movzx(rcx, GetSrc(Op->Header.Args[1].ID())); + mul(cx); + movzx(rax, ax); + mov(GetDst(Node), rax); + break; + case 4: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mul(GetSrc(Op->Header.Args[1].ID())); + mov(GetDst(Node), rax); + break; + case 8: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mul(GetSrc(Op->Header.Args[1].ID())); + mov(GetDst(Node), rax); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + movzx(rcx, GetSrc(Op->Header.Args[1].ID())); + mul(cl); + movzx(rax, ax); + mov(GetDst(Node), rax); + break; + case 2: + movzx(rax, GetSrc(Op->Header.Args[0].ID())); + movzx(rcx, GetSrc(Op->Header.Args[1].ID())); + mul(cx); + movzx(rax, dx); + mov(GetDst(Node), rax); + break; + case 4: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mul(GetSrc(Op->Header.Args[1].ID())); + mov(GetDst(Node), rdx); + break; + case 8: + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mul(GetSrc(Op->Header.Args[1].ID())); + mov(GetDst(Node), rdx); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_LDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, GetSrc(Op->Header.Args[0].ID())); + mov(edx, GetSrc(Op->Header.Args[1].ID())); + mov(ecx, GetSrc(Op->Header.Args[2].ID())); + idiv(ecx); + mov(GetDst(Node), rax); + break; + } + case 8: { + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mov(rdx, GetSrc(Op->Header.Args[1].ID())); + mov(rcx, GetSrc(Op->Header.Args[2].ID())); + idiv(rcx); + mov(GetDst(Node), rax); + break; + } + default: LogMan::Msg::A("Unknown LDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, GetSrc(Op->Header.Args[0].ID())); + mov(edx, GetSrc(Op->Header.Args[1].ID())); + mov(ecx, GetSrc(Op->Header.Args[2].ID())); + idiv(ecx); + mov(GetDst(Node), rdx); + break; + } + + case 8: { + mov(rax, GetSrc(Op->Header.Args[0].ID())); + mov(rdx, GetSrc(Op->Header.Args[1].ID())); + mov(rcx, GetSrc(Op->Header.Args[2].ID())); + idiv(rcx); + mov(GetDst(Node), rdx); + break; + } + default: LogMan::Msg::A("Unknown LREM Size: %d", Size); break; + } + break; + } + case IR::OP_LUDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov (eax, GetSrc(Op->Header.Args[0].ID())); + mov (edx, GetSrc(Op->Header.Args[1].ID())); + mov (ecx, GetSrc(Op->Header.Args[2].ID())); + div(ecx); + mov(GetDst(Node), rax); + break; + } + case 8: { + mov (rax, GetSrc(Op->Header.Args[0].ID())); + mov (rdx, GetSrc(Op->Header.Args[1].ID())); + mov (rcx, GetSrc(Op->Header.Args[2].ID())); + div(rcx); + mov(GetDst(Node), rax); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LUREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov (eax, GetSrc(Op->Header.Args[0].ID())); + mov (edx, GetSrc(Op->Header.Args[1].ID())); + mov (ecx, GetSrc(Op->Header.Args[2].ID())); + div(ecx); + mov(GetDst(Node), rdx); + break; + } + + case 8: { + mov (rax, GetSrc(Op->Header.Args[0].ID())); + mov (rdx, GetSrc(Op->Header.Args[1].ID())); + mov (rcx, GetSrc(Op->Header.Args[2].ID())); + div(rcx); + mov(GetDst(Node), rdx); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LOADFLAG: { + auto Op = IROp->C(); + + auto Dst = GetDst(Node); + movzx(Dst, byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)]); + and(Dst, 1); + break; + } + case IR::OP_STOREFLAG: { + auto Op = IROp->C(); + + mov (rax, GetSrc(Op->Header.Args[0].ID())); + and(rax, 1); + mov(byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)], al); + break; + } + case IR::OP_SELECT: { + auto Op = IROp->C(); + auto Dst = GetDst(Node); + + mov(rax, GetSrc(Op->Header.Args[0].ID())); + cmp(rax, GetSrc(Op->Header.Args[1].ID())); + + switch (Op->Cond) { + case FEXCore::IR::COND_EQ: + mov(rax, GetSrc(Op->Header.Args[3].ID())); + cmove(rax, GetSrc(Op->Header.Args[2].ID())); + break; + case FEXCore::IR::COND_NEQ: + mov(rax, GetSrc(Op->Header.Args[3].ID())); + cmovne(rax, GetSrc(Op->Header.Args[2].ID())); + break; + case FEXCore::IR::COND_GE: + mov(rax, GetSrc(Op->Header.Args[3].ID())); + cmovge(rax, GetSrc(Op->Header.Args[2].ID())); + break; + case FEXCore::IR::COND_LT: + mov(rax, GetSrc(Op->Header.Args[2].ID())); + cmovae(rax, GetSrc(Op->Header.Args[3].ID())); + break; + case FEXCore::IR::COND_GT: + mov(rax, GetSrc(Op->Header.Args[3].ID())); + cmovg(rax, GetSrc(Op->Header.Args[2].ID())); + break; + case FEXCore::IR::COND_LE: + mov(rax, GetSrc(Op->Header.Args[3].ID())); + cmovle(rax, GetSrc(Op->Header.Args[2].ID())); + break; + case FEXCore::IR::COND_CS: + case FEXCore::IR::COND_CC: + case FEXCore::IR::COND_MI: + case FEXCore::IR::COND_PL: + case FEXCore::IR::COND_VS: + case FEXCore::IR::COND_VC: + case FEXCore::IR::COND_HI: + case FEXCore::IR::COND_LS: + default: + LogMan::Msg::A("Unsupported compare type"); + break; + } + mov (Dst, rax); + break; + } + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + auto Dst = GetDst(Node); + mov(rax, Memory); + add(rax, GetSrc(Op->Header.Args[0].ID())); + switch (Op->Size) { + case 1: { + movzx (Dst, byte [rax]); + } + break; + case 2: { + movzx (Dst, word [rax]); + } + break; + case 4: { + mov(Dst, dword [rax]); + } + break; + case 8: { + mov(Dst, qword [rax]); + } + break; + case 16: { + movups(GetDst(Node), xword [rax]); + if (MemoryDebug) { + movq(rcx, GetDst(Node)); + } + } + break; + default: LogMan::Msg::A("Unhandled LoadMem size: %d", Op->Size); + } + break; + } + case IR::OP_STOREMEM: { + auto Op = IROp->C(); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + mov(rax, Memory); + add(rax, GetSrc(Op->Header.Args[0].ID())); + switch (Op->Size) { + case 1: + mov(byte [rax], GetSrc(Op->Header.Args[1].ID())); + break; + case 2: + mov(word [rax], GetSrc(Op->Header.Args[1].ID())); + break; + case 4: + mov(dword [rax], GetSrc(Op->Header.Args[1].ID())); + break; + case 8: + mov(qword [rax], GetSrc(Op->Header.Args[1].ID())); + break; + case 16: + movups(xword [rax], GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unhandled StoreMem size: %d", Op->Size); + } + break; + } + case IR::OP_SYSCALL: { + auto Op = IROp->C(); + // XXX: This is very terrible, but I don't care for right now + + push(rdi); + const std::array RA64 = { rsi, r8, r9, r10, r11, rbx, rbp, r12, r13, r14, r15 }; + for (auto &Reg : RA64) + push(Reg); + + // Syscall ABI for x86-64 + // this: rdi + // Thread: rsi + // ArgPointer: rdx (Stack) + // + // Result: RAX + + // These are pushed in reverse order because stacks + for (uint32_t i = 7; i > 0; --i) + push(GetSrc(Op->Header.Args[i - 1].ID())); + + mov(rsi, rdi); // Move thread in to rsi + mov(rdi, reinterpret_cast(&CTX->SyscallHandler)); + mov(rdx, rsp); + + using PtrType = uint64_t (FEXCore::SyscallHandler::*)(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args); + union { + PtrType ptr; + uint64_t Raw; + } PtrCast; + PtrCast.ptr = &FEXCore::SyscallHandler::HandleSyscall; + mov(rax, PtrCast.Raw); + call(rax); + + // Reload arguments just in case they are sill live after the fact + for (uint32_t i = 0; i < 7; ++i) + pop(GetSrc(Op->Header.Args[i].ID())); + + for (uint32_t i = RA64.size(); i > 0; --i) + pop(RA64[i - 1]); + + pop(rdi); + + mov (GetDst(Node), rax); + break; + } + case IR::OP_CPUID: { + auto Op = IROp->C(); + using ClassPtrType = FEXCore::CPUIDEmu::FunctionResults (FEXCore::CPUIDEmu::*)(uint32_t Function); + union { + ClassPtrType ClassPtr; + uint64_t Raw; + } Ptr; + Ptr.ClassPtr = &CPUIDEmu::RunFunction; + + const std::array RA64 = { rsi, r8, r9, r10, r11, rbx, rbp, r12, r13, r14, r15 }; + for (auto &Reg : RA64) + push(Reg); + + // CPUID ABI + // this: rdi + // Function: rsi + // + // Result: RAX, RDX. 4xi32 + push(rdi); + mov (rsi, GetSrc(Op->Header.Args[0].ID())); + mov (rdi, reinterpret_cast(&CTX->CPUID)); + + sub(rsp, 8); // Align + + mov(rax, Ptr.Raw); + call(rax); + + add(rsp, 8); // Align + + pop(rdi); + + for (uint32_t i = RA64.size(); i > 0; --i) + pop(RA64[i - 1]); + + auto Dst = GetDst(Node); + pinsrq(Dst, rax, 0); + pinsrd(Dst, rdx, 1); + break; + } + case IR::OP_EXTRACTELEMENT: { + auto Op = IROp->C(); + + uint32_t PhysReg = RA::GetNodeRegister(Graph, Op->Header.Args[0].ID()); + if (PhysReg >= XMMBase) { + switch (OpSize) { + case 1: + pextrb(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), Op->Idx); + break; + case 2: + pextrw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), Op->Idx); + break; + case 4: + pextrd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), Op->Idx); + break; + case 8: + pextrq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), Op->Idx); + break; + default: LogMan::Msg::A("Unhandled ExtractElementSize: %d", OpSize); + } + } + else { + LogMan::Msg::A("Can't handle extract from GPR yet"); + } + break; + } + case IR::OP_VINSELEMENT: { + auto Op = IROp->C(); + movapd(xmm15, GetSrc(Op->Header.Args[0].ID())); + + // Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + + // pextrq reg64/mem64, xmm, imm + // pinsrq xmm, reg64/mem64, imm8 + switch (Op->ElementSize) { + case 1: { + pextrb(al, GetSrc(Op->Header.Args[1].ID()), Op->SrcIdx); + pinsrb(xmm15, al, Op->DestIdx); + break; + } + case 2: { + pextrw(ax, GetSrc(Op->Header.Args[1].ID()), Op->SrcIdx); + pinsrw(xmm15, ax, Op->DestIdx); + break; + } + case 4: { + pextrd(eax, GetSrc(Op->Header.Args[1].ID()), Op->SrcIdx); + pinsrd(xmm15, eax, Op->DestIdx); + break; + } + case 8: { + pextrq(rax, GetSrc(Op->Header.Args[1].ID()), Op->SrcIdx); + pinsrq(xmm15, rax, Op->DestIdx); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + + movapd(GetDst(Node), xmm15); + break; + } + case IR::OP_VADD: { + auto Op = IROp->C(); + switch (Op->ElementSize) { + case 1: { + vpaddb(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 2: { + vpaddw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 4: { + vpaddd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 8: { + vpaddq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VSUB: { + auto Op = IROp->C(); + switch (Op->ElementSize) { + case 1: { + vpsubb(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 2: { + vpsubw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 4: { + vpsubd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case 8: { + vpsubq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + break; + } + case IR::OP_VXOR: { + auto Op = IROp->C(); + vpxor(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_VOR: { + auto Op = IROp->C(); + vpor(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + } + case IR::OP_VCMPEQ: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + + switch (Op->ElementSize) { + case 1: + vpcmpeqb(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 2: + vpcmpeqw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 4: + vpcmpeqd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 8: + vpcmpeqq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unsupported elementSize: %d", Op->ElementSize); + } + break; + } + case IR::OP_VCMPGT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + + switch (Op->ElementSize) { + case 1: + vpcmpgtb(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 2: + vpcmpgtw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 4: + vpcmpgtd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + case 8: + vpcmpgtq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID())); + break; + default: LogMan::Msg::A("Unsupported elementSize: %d", Op->ElementSize); + } + break; + } + case IR::OP_VZIP: { + auto Op = IROp->C(); + movapd(xmm15, GetSrc(Op->Header.Args[0].ID())); + + switch (Op->ElementSize) { + case 1: { + punpcklbw(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 2: { + punpcklwd(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 4: { + punpckldq(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 8: { + punpcklqdq(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movapd(GetDst(Node), xmm15); + break; + } + case IR::OP_VZIP2: { + auto Op = IROp->C(); + movapd(xmm15, GetSrc(Op->Header.Args[0].ID())); + + switch (Op->ElementSize) { + case 1: { + punpckhbw(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 2: { + punpckhwd(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 4: { + punpckhdq(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + case 8: { + punpckhqdq(xmm15, GetSrc(Op->Header.Args[1].ID())); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movapd(GetDst(Node), xmm15); + break; + } + case IR::OP_VUSHLS: { + auto Op = IROp->C(); + movapd(xmm15, GetSrc(Op->Header.Args[0].ID())); + vmovq(xmm14, Reg64(GetSrc(Op->Header.Args[1].ID()).getIdx())); + + switch (Op->ElementSize) { + case 2: { + psllw(xmm15, xmm14); + break; + } + case 4: { + pslld(xmm15, xmm14); + break; + } + case 8: { + psllq(xmm15, xmm14); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movapd(GetDst(Node), xmm15); + + break; + } + case IR::OP_CAS: { + auto Op = IROp->C(); + // Args[0]: Desired + // Args[1]: Expected + // Args[2]: Pointer + // DataSrc = *Src1 + // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc + // This will write to memory! Careful! + // Third operand must be a calculated guest memory address + //OrderedNode *CASResult = _CAS(Src3, Src2, Src1); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + mov(rcx, Memory); + add(rcx, GetSrc(Op->Header.Args[2].ID())); + mov(rdx, GetSrc(Op->Header.Args[1].ID())); + mov(rax, GetSrc(Op->Header.Args[0].ID())); + + // RCX now contains pointer + // RAX contains our expected value + // RDX contains our desired + + lock(); + + switch (OpSize) { + case 1: { + cmpxchg(byte [rcx], dl); + movzx(rax, al); + break; + } + case 2: { + cmpxchg(word [rcx], dx); + movzx(rax, ax); + break; + } + case 4: { + cmpxchg(dword [rcx], edx); + break; + } + case 8: { + cmpxchg(qword [rcx], rdx); + break; + } + default: LogMan::Msg::A("Unsupported: %d", OpSize); + } + + // RAX now contains the result + mov (GetDst(Node), rax); + break; + } + case IR::OP_CYCLECOUNTER: { +#ifdef DEBUG_CYCLES + mov (GetDst(Node), 0); +#else + rdtsc(); + shl(rdx, 32); + or(rax, rdx); + mov (GetDst(Node), rax); +#endif + break; + } + case IR::OP_FINDLSB: { + auto Op = IROp->C(); + tzcnt(rcx, GetSrc(Op->Header.Args[0].ID())); + xor(rax, rax); + cmp(GetSrc(Op->Header.Args[0].ID()), 1); + sbb(rax, rax); + or(rax, rcx); + mov (GetDst(Node), rax); + break; + } + case IR::OP_FINDMSB: { + auto Op = IROp->C(); + mov(rax, OpSize * 8); + lzcnt(rcx, GetSrc(Op->Header.Args[0].ID())); + sub(rax, rcx); + mov (GetDst(Node), rax); + break; + } + default: break; + } + } + else { + switch (IROp->Op) { + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); +#define LOAD_CTX(x, y) \ + case x: { \ + movzx(rax, y [STATE + Op->Offset]); \ + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); \ + } \ + break + switch (Op->Size) { + LOAD_CTX(1, byte); + LOAD_CTX(2, word); + case 4: { + mov(eax, dword [STATE + Op->Offset]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + break; + case 8: { + mov(rax, qword [STATE + Op->Offset]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + break; + case 16: { + if (Op->Offset % 16 == 0) { + movaps(xmm0, xword [STATE + Op->Offset]); + movaps(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + } + else { + movups(xmm0, xword [STATE + Op->Offset]); + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + } + } + break; + default: LogMan::Msg::A("Unhandled LoadContext size: %d", Op->Size); + } +#undef LOAD_CTX + break; + } + case IR::OP_STORECONTEXT: { + auto Op = IROp->C(); + + switch (Op->Size) { + case 1: { + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(byte [STATE + Op->Offset], al); + } + break; + + case 2: { + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(word [STATE + Op->Offset], ax); + } + break; + case 4: { + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(dword [STATE + Op->Offset], eax); + } + break; + case 8: { + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(qword [STATE + Op->Offset], rax); + } + break; + case 16: { + if (Op->Offset % 16 == 0) { + movaps(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movaps(xword [STATE + Op->Offset], xmm0); + } + else { + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xword [STATE + Op->Offset], xmm0); + } + } + break; + default: LogMan::Msg::A("Unhandled StoreContext size: %d", Op->Size); + } + + break; + } + + case IR::OP_SYSCALL: { + auto Op = IROp->C(); + + push(rdi); + push(r11); + + // Syscall ABI for x86-64 + // this: rdi + // Thread: rsi + // ArgPointer: rdx (Stack) + // + // Result: RAX + + mov(rsi, rdi); // Move thread in to rsi + mov(rdi, reinterpret_cast(&CTX->SyscallHandler)); + + // These are pushed in reverse order because stacks + push(qword [TEMP_STACK + (Op->Header.Args[6].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[5].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[4].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[3].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[2].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + push(qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov (rdx, rsp); + + using PtrType = uint64_t (FEXCore::SyscallHandler::*)(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args); + union { + PtrType ptr; + uint64_t Raw; + } PtrCast; + PtrCast.ptr = &FEXCore::SyscallHandler::HandleSyscall; + mov(rax, PtrCast.Raw); + call(rax); + add(rsp, 7 * 8); + + pop(r11); + pop(rdi); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_CPUID: { + auto Op = IROp->C(); + using ClassPtrType = FEXCore::CPUIDEmu::FunctionResults (FEXCore::CPUIDEmu::*)(uint32_t Function); + union { + ClassPtrType ClassPtr; + uint64_t Raw; + } Ptr; + Ptr.ClassPtr = &CPUIDEmu::RunFunction; + + + // CPUID ABI + // this: rdi + // Function: rsi + // + // Result: RAX, RDX. 4xi32 + push(rdi); + push(r11); + mov (rsi, qword [TEMP_STACK + (Op->Header.Args[0].ID() *16)]); + mov (rdi, reinterpret_cast(&CTX->CPUID)); + + push(rax); // align + + mov(rax, Ptr.Raw); + call(rax); + + pop(r11); // align + + pop(r11); + pop(rdi); + + mov(dword [TEMP_STACK + (WrapperOp->ID() * 16) + 0], eax); + shr(rax, 32); + mov(dword [TEMP_STACK + (WrapperOp->ID() * 16) + 4], eax); + + mov(dword [TEMP_STACK + (WrapperOp->ID() * 16) + 8], edx); + shr(rdx, 32); + mov(dword [TEMP_STACK + (WrapperOp->ID() * 16) + 12], edx); + break; + } + case IR::OP_EXTRACTELEMENT: { + auto Op = IROp->C(); + + uint32_t Offset = Op->Header.Args[0].ID() * 16 + OpSize * Op->Idx; + switch (OpSize) { + case 1: + movzx(rax, byte [TEMP_STACK + Offset]); + break; + case 2: + movzx(rax, word [TEMP_STACK + Offset]); + break; + case 4: + mov(eax, dword [TEMP_STACK + Offset]); + break; + case 8: + mov(rax, qword [TEMP_STACK + Offset]); + break; + default: LogMan::Msg::A("Unhandled ExtractElementSize: %d", OpSize); + } + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_LOADFLAG: { + auto Op = IROp->C(); + + movzx(rax, byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)]); + and(rax, 1); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_STOREFLAG: { + auto Op = IROp->C(); + + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + and(rax, 1); + mov(byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)], al); + break; + } + + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + + Label *TargetLabel; + auto IsTarget = JumpTargets.find(Op->Header.Args[1].ID()); + if (IsTarget == JumpTargets.end()) { + TargetLabel = &JumpTargets.try_emplace(Op->Header.Args[1].ID(), Label{}).first->second; + } + else { + TargetLabel = &IsTarget->second; + } + + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + cmp(rax, 0); + jne(*TargetLabel); + break; + } + + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(rcx, Memory); + add(rax, rcx); + switch (Op->Size) { + case 1: { + movzx (rcx, byte [rax]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rcx); + } + break; + case 2: { + movzx (rcx, word [rax]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rcx); + } + break; + case 4: { + mov(ecx, dword [rax]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rcx); + } + break; + case 8: { + mov(rcx, qword [rax]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rcx); + } + break; + case 16: { + movups(xmm0, xword [rax]); + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + if (MemoryDebug) { + movq(rcx, xmm0); + } + } + break; + default: LogMan::Msg::A("Unhandled LoadMem size: %d", Op->Size); + } + + if (MemoryDebug) { + push(rdi); + push(r11); + sub(rsp, 8); + + // Load the address in to Arg1 + mov(rdi, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + // Move the loaded value to Arg2 + mov(rsi, rcx); + mov (rdx, Op->Size); + + mov(rax, reinterpret_cast(LoadMem)); + call(rax); + + add(rsp, 8); + + pop(r11); + pop(rdi); + } + + break; + } + case IR::OP_STOREMEM: { + auto Op = IROp->C(); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + mov(rax, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + mov(rcx, Memory); + add(rax, rcx); + switch (Op->Size) { + case 1: { + mov(cl, byte [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + mov(byte [rax], cl); + if (MemoryDebug) { + movzx(rcx, cl); + } + } + break; + case 2: { + mov(cx, word [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + mov(word [rax], cx); + + if (MemoryDebug) { + movzx(rcx, cx); + } + } + break; + case 4: { + mov(ecx, dword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + mov(dword [rax], ecx); + } + break; + case 8: { + mov(rcx, qword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + mov(qword [rax], rcx); + } + break; + case 16: { + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + movups(xword [rax], xmm0); + if (MemoryDebug) { + movq(rcx, xmm0); + } + } + break; + default: LogMan::Msg::A("Unhandled StoreMem size: %d", Op->Size); + } + + if (MemoryDebug) { + push(rdi); + push(r11); + sub(rsp, 8); + + // Load the address in to Arg1 + mov(rdi, qword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + // Load the value from RAX in to Arg2 + mov(rsi, rcx); + + mov (rdx, Op->Size); + + mov(rax, reinterpret_cast(StoreMem)); + call(rax); + + add(rsp, 8); + + pop(r11); + pop(rdi); + } + break; + } + case IR::OP_MOV: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_CONSTANT: { + auto Op = IROp->C(); + if (Op->Constant >> 31) { + mov(rax, Op->Constant); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + else { + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], Op->Constant); + } + break; + } + case IR::OP_POPCOUNT: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movzx(al, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + popcnt(eax, eax); + break; + case 2: + popcnt(ax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movzx(rax, ax); + break; + case 4: + popcnt(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + break; + case 8: + popcnt(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + break; + } + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_ADD: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + add(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_SUB: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sub(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_XOR: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + xor(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_AND: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + and(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_OR: { + auto Op = IROp->C(); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + or(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_MUL: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movsx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movsx(rcx, byte [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + imul(cl); + movsx(rax, al); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 2: + movsx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movsx(rcx, word [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + imul(cx); + movsx(rax, ax); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 4: + movsxd(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + imul(dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + movsxd(rax, eax); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 8: + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + imul(qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMUL: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movzx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movzx(rcx, byte [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mul(cl); + movzx(rax, al); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 2: + movzx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movzx(rcx, word [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mul(cx); + movzx(rax, ax); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 4: + mov(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mul(dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 8: + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mul(qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + + case IR::OP_MULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movsx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movsx(rcx, byte [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + imul(cl); + movsx(rax, ax); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 2: + movsx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movsx(rcx, word [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + imul(cx); + movsx(rax, dx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 4: + movsxd(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + imul(dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + movsxd(rax, edx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 8: + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + imul(qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_UMULH: { + auto Op = IROp->C(); + switch (OpSize) { + case 1: + movzx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movzx(rcx, byte [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mul(cl); + movzx(rax, ax); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 2: + movzx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movzx(rcx, word [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mul(cx); + movzx(rax, dx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 4: + mov(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mul(dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + case 8: + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mul(qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", OpSize); + } + break; + } + case IR::OP_LDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(edx, dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(ecx, dword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + idiv(ecx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rdx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + idiv(rcx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + default: LogMan::Msg::A("Unknown LDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(edx, dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(ecx, dword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + idiv(ecx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + } + + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rdx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + idiv(rcx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + } + default: LogMan::Msg::A("Unknown LREM Size: %d", Size); break; + } + break; + } + + + case IR::OP_LUDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(edx, dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(ecx, dword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + div(ecx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rdx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + div(rcx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + case IR::OP_LUREM: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto Size = OpSize; + switch (Size) { + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(edx, dword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(ecx, dword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + div(ecx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + } + + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rdx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + div(rcx); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rdx); + break; + } + default: LogMan::Msg::A("Unknown LUDIV Size: %d", Size); break; + } + break; + } + + case IR::OP_ZEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + + if (Op->SrcSize == 64) { + movd(xmm0, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + movups(qword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + } + else { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rcx, uint64_t((1ULL << Op->SrcSize) - 1)); + and(rax, rcx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + break; + } + + case IR::OP_SEXT: { + auto Op = IROp->C(); + LogMan::Throw::A(Op->SrcSize <= 64, "Can't support Zext of size: %ld", Op->SrcSize); + switch (Op->SrcSize / 8) { + case 1: + movsx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 2: + movsx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 4: + movsxd(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + case 8: + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + default: LogMan::Msg::A("Unknown Sext size: %d", Op->SrcSize / 8); + } + break; + } + case IR::OP_BFI: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 8, "OpSize is too large for BFI: %d", OpSize); + + uint64_t SourceMask = (1ULL << Op->Width) - 1; + + uint64_t DestMask = ~(SourceMask << Op->lsb); + + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + + if (Op->Width != 64) { + mov(rdx, SourceMask); + and(rcx, rdx); + } + + mov(rdx, DestMask); + and(rax, rdx); + shl(rdx, Op->lsb); + or(rax, rdx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + + break; + } + + case IR::OP_BFE: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "OpSize is too large for BFE: %d", OpSize); + // %ssa64 i128 = Bfe %ssa48 i128, 0x1, 0x7 + if (OpSize == 16) { + LogMan::Throw::A(!(Op->lsb < 64 && (Op->lsb + Op->Width > 64)), "Trying to BFE an XMM across the 64bit split: Beginning at %d, ending at %d", Op->lsb, Op->lsb + Op->Width); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + uint8_t Offset = Op->lsb; + if (Offset < 64) { + pextrq(rax, xmm0, 0); + } + else { + pextrq(rax, xmm0, 1); + Offset -= 64; + } + + if (Offset) { + shr(rax, Offset); + } + + if (Op->Width != 64) { + mov(rcx, uint64_t((1ULL << Op->Width) - 1)); + and(rax, rcx); + } + + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + else { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + if (Op->lsb != 0) + shr(rax, Op->lsb); + + if (Op->Width != 64) { + mov(rcx, uint64_t((1ULL << Op->Width) - 1)); + and(rax, rcx); + } + + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + } + break; + } + case IR::OP_FINDLSB: { + auto Op = IROp->C(); + tzcnt(rcx, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + xor(rax, rax); + cmp(qword [TEMP_STACK + Op->Header.Args[0].ID() * 16], 1); + sbb(rax, rax); + or(rax, rcx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + + break; + } + case IR::OP_FINDMSB: { + auto Op = IROp->C(); + mov(rax, OpSize * 8); + lzcnt(rcx, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sub(rax, rcx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + + case IR::OP_LSHR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + and(rcx, Mask); + shrx(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16], rcx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_LSHL: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + and(rcx, Mask); + shlx(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16], rcx); + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_ASHR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + and(rcx, Mask); + switch (OpSize) { + case 1: + movsx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sar(al, cl); + break; + case 2: + movsx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sar(ax, cl); + break; + case 4: + movsxd(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sar(eax, cl); + break; + case 8: + mov(rax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + sar(rax, cl); + break; + default: LogMan::Msg::A("Unknown ASHR Size: %d\n", OpSize); break; + }; + + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + + case IR::OP_ROL: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + and(rcx, Mask); + switch (OpSize) { + case 1: { + movzx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + rol(al, cl); + break; + } + case 2: { + movzx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + rol(ax, cl); + break; + } + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + rol(eax, cl); + break; + } + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + rol(rax, cl); + break; + } + } + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_ROR: { + auto Op = IROp->C(); + uint8_t Mask = OpSize * 8 - 1; + + mov(rcx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + and(rcx, Mask); + switch (OpSize) { + case 1: { + movzx(rax, byte [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + ror(al, cl); + break; + } + case 2: { + movzx(rax, word [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + ror(ax, cl); + break; + } + case 4: { + mov(eax, dword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + ror(eax, cl); + break; + } + case 8: { + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + ror(rax, cl); + break; + } + } + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + + case IR::OP_SELECT: { + auto Op = IROp->C(); + + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + cmp(rax, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + + switch (Op->Cond) { + case FEXCore::IR::COND_EQ: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + cmove(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + break; + case FEXCore::IR::COND_NEQ: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + cmovne(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + break; + case FEXCore::IR::COND_GE: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + cmovge(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + break; + case FEXCore::IR::COND_LT: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + cmovae(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + break; + case FEXCore::IR::COND_GT: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + cmovg(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + break; + case FEXCore::IR::COND_LE: + mov(rcx, qword [TEMP_STACK + Op->Header.Args[3].ID() * 16]); + cmovle(rcx, qword [TEMP_STACK + Op->Header.Args[2].ID() * 16]); + break; + case FEXCore::IR::COND_CS: + case FEXCore::IR::COND_CC: + case FEXCore::IR::COND_MI: + case FEXCore::IR::COND_PL: + case FEXCore::IR::COND_VS: + case FEXCore::IR::COND_VC: + case FEXCore::IR::COND_HI: + case FEXCore::IR::COND_LS: + default: + LogMan::Msg::A("Unsupported compare type"); + break; + } + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rcx); + break; + } + + case IR::OP_CAS: { + auto Op = IROp->C(); + // Args[0]: Expected + // Args[1]: Desired + // Args[2]: Pointer + // DataSrc = *Src1 + // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc + // This will write to memory! Careful! + // Third operand must be a calculated guest memory address + //OrderedNode *CASResult = _CAS(Src3, Src2, Src1); + uint64_t Memory = CTX->MemoryMapper.GetBaseOffset(0); + + mov(rcx, Memory); + add(rcx, qword [TEMP_STACK + (Op->Header.Args[2].ID() * 16)]); + + mov(rdx, qword [TEMP_STACK + Op->Header.Args[1].ID() * 16]); + mov(rax, qword [TEMP_STACK + Op->Header.Args[0].ID() * 16]); + + // RCX now contains pointer + // RAX contains our expected value + // RDX contains our desired + + lock(); + + switch (OpSize) { + case 1: { + cmpxchg(byte [rcx], dl); + movzx(rax, al); + break; + } + case 2: { + cmpxchg(word [rcx], dx); + movzx(rax, ax); + break; + } + case 4: { + cmpxchg(dword [rcx], edx); + break; + } + case 8: { + cmpxchg(qword [rcx], rdx); + break; + } + default: LogMan::Msg::A("Unsupported: %d", OpSize); + } + + // RAX now contains the result + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + case IR::OP_VCMPEQ: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + + switch (Op->ElementSize) { + case 1: { + pcmpeqb(xmm0, xmm1); + break; + } + case 2: { + pcmpeqw(xmm0, xmm1); + break; + } + case 4: { + pcmpeqd(xmm0, xmm1); + break; + } + case 8: { + pcmpeqq(xmm0, xmm1); + break; + } + + default: LogMan::Msg::A("Unsupported elementSize: %d", Op->ElementSize); + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + break; + } + case IR::OP_VCMPGT: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + + LogMan::Throw::A(Op->RegisterSize == 16, "Can't handle register size of: %d", Op->RegisterSize); + + switch (Op->ElementSize) { + case 1: { + pcmpgtb(xmm0, xmm1); + break; + } + case 2: { + pcmpgtw(xmm0, xmm1); + break; + } + case 4: { + pcmpgtd(xmm0, xmm1); + break; + } + case 8: { + pcmpgtq(xmm0, xmm1); + break; + } + + default: LogMan::Msg::A("Unsupported elementSize: %d", Op->ElementSize); + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + break; + } + + case IR::OP_VXOR: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + pxor(xmm0, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + break; + } + case IR::OP_VOR: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + por(xmm0, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + break; + } + + case IR::OP_VINSELEMENT: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + + // Dst_d[Op->DestIdx] = Src2_d[Op->SrcIdx]; + + // pextrq reg64/mem64, xmm, imm + // pinsrq xmm, reg64/mem64, imm8 + switch (Op->ElementSize) { + case 1: { + pextrb(al, xmm1, Op->SrcIdx); + pinsrb(xmm0, al, Op->DestIdx); + break; + } + case 2: { + pextrw(ax, xmm1, Op->SrcIdx); + pinsrw(xmm0, ax, Op->DestIdx); + break; + } + case 4: { + pextrd(eax, xmm1, Op->SrcIdx); + pinsrd(xmm0, eax, Op->DestIdx); + break; + } + case 8: { + pextrq(rax, xmm1, Op->SrcIdx); + pinsrq(xmm0, rax, Op->DestIdx); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + break; + } + case IR::OP_VADD: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + switch (Op->ElementSize) { + case 1: { + paddb(xmm0, xmm1); + break; + } + case 2: { + paddw(xmm0, xmm1); + break; + } + case 4: { + paddd(xmm0, xmm1); + break; + } + case 8: { + paddq(xmm0, xmm1); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + + break; + } + + case IR::OP_VUSHLS: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + + switch (Op->ElementSize) { + case 2: { + psllw(xmm0, xmm1); + break; + } + case 4: { + pslld(xmm0, xmm1); + break; + } + case 8: { + psllq(xmm0, xmm1); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + + break; + } + case IR::OP_VZIP: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + switch (Op->ElementSize) { + case 1: { + punpcklbw(xmm0, xmm1); + break; + } + case 2: { + punpcklwd(xmm0, xmm1); + break; + } + case 4: { + punpckldq(xmm0, xmm1); + break; + } + case 8: { + punpcklqdq(xmm0, xmm1); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + + break; + } + case IR::OP_VZIP2: { + auto Op = IROp->C(); + movups(xmm0, xword [TEMP_STACK + (Op->Header.Args[0].ID() * 16)]); + movups(xmm1, xword [TEMP_STACK + (Op->Header.Args[1].ID() * 16)]); + switch (Op->ElementSize) { + case 1: { + punpckhbw(xmm0, xmm1); + break; + } + case 2: { + punpckhwd(xmm0, xmm1); + break; + } + case 4: { + punpckhdq(xmm0, xmm1); + break; + } + case 8: { + punpckhqdq(xmm0, xmm1); + break; + } + default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break; + } + movups(xword [TEMP_STACK + (WrapperOp->ID() * 16)], xmm0); + + break; + } + + case IR::OP_CYCLECOUNTER: { +#ifdef DEBUG_CYCLES + mov (rax, 0); +#else + rdtsc(); + shl(rdx, 32); + or(rax, rdx); +#endif + mov (qword [TEMP_STACK + (WrapperOp->ID() * 16)], rax); + break; + } + default: break; + } + } + + ++Begin; + } + + ready(); +// LogMan::Msg::D("Ptr: %p,+%ld", Entry, getCurr() - (uintptr_t)Entry); +// static int a = 0; +// if (a++ > 100) +// __builtin_trap(); + return Entry; +} + +FEXCore::CPU::CPUBackend *CreateJITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread) { + return new JITCore(ctx); +} +} diff --git a/Source/Interface/Core/JIT/x86_64/JIT.h b/Source/Interface/Core/JIT/x86_64/JIT.h new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/Source/Interface/Core/JIT/x86_64/JIT.h @@ -0,0 +1 @@ + diff --git a/Source/Interface/Core/LLVMJIT/LLVMCore.cpp b/Source/Interface/Core/LLVMJIT/LLVMCore.cpp new file mode 100644 index 000000000..fe42562e9 --- /dev/null +++ b/Source/Interface/Core/LLVMJIT/LLVMCore.cpp @@ -0,0 +1,1933 @@ +#include "Interface/Context/Context.h" +#include "Interface/Core/DebugData.h" +#include "Interface/Core/LLVMJIT/LLVMMemoryManager.h" +#include "Interface/HLE/Syscalls.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DESTMAP_AS_MAP 0 +#if DESTMAP_AS_MAP +using DestMapType = std::unordered_map; +#else +using DestMapType = std::vector; +#endif + +namespace FEXCore::CPU { + +static void CPUIDRun_Thunk(CPUIDEmu::FunctionResults *Results, FEXCore::CPUIDEmu *Class, uint32_t Function) { + *Results = Class->RunFunction(Function); +} + +static void SetExitState_Thunk(FEXCore::Core::InternalThreadState *Thread) { + Thread->State.RunningEvents.ShouldStop = true; +} + +class LLVMJITCore final : public CPUBackend { +public: + explicit LLVMJITCore(FEXCore::Core::InternalThreadState *Thread); + ~LLVMJITCore() override; + std::string GetName() override { return "JIT"; } + void* CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) override ; + + void *MapRegion(void *HostPtr, uint64_t GuestPtr, uint64_t Size) override { + return HostPtr; + } + + bool NeedsOpDispatch() override { return true; } + +private: + void HandleIR(FEXCore::IR::IRListView const *IR, IR::NodeWrapperIterator *Node); + llvm::Value *CreateContextGEP(uint64_t Offset, uint8_t Size); + llvm::Value *CreateContextPtr(uint64_t Offset, uint8_t Size); + llvm::Value *CreateMemoryLoad(llvm::Value *Ptr); + void CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val); + + void ValidateMemoryInVM(uint64_t Ptr, uint8_t Size, bool Load); + template + Type MemoryLoad_Validate(uint64_t Ptr); + template + void MemoryStore_Validate(uint64_t Ptr, Type Val); + + void DebugPrint(uint64_t Val); + void DebugPrint128(__uint128_t Val); + + FEXCore::Core::InternalThreadState *ThreadState; + FEXCore::Context::Context *CTX; + + struct LLVMState { + LLVMContextRef ContextRef; + llvm::Module *MainModule; + llvm::EngineBuilder *MainEngineBuilder; + llvm::IRBuilder<> *IRBuilder; + LLVMMemoryManager *MemManager; + std::vector Functions; + }; + + struct LLVMCurrentState { + llvm::Function *SyscallFunction; + llvm::Function *CPUIDFunction; + llvm::Function *ExitVMFunction; + llvm::Function *ValuePrinter; + + llvm::Function *ValidateLoad8; + llvm::Function *ValidateLoad16; + llvm::Function *ValidateLoad32; + llvm::Function *ValidateLoad64; + llvm::Function *ValidateLoad128; + + llvm::Function *ValidateStore8; + llvm::Function *ValidateStore16; + llvm::Function *ValidateStore32; + llvm::Function *ValidateStore64; + llvm::Function *ValidateStore128; + + llvm::Function *DebugPrint; + llvm::Function *DebugPrint128; + + llvm::Type *CPUStateType; + llvm::GlobalVariable *CPUStateVar; + llvm::LoadInst *CPUState; + + llvm::BasicBlock *CurrentBlock; + std::vector Blocks; + bool CurrentBlockHasTerm{false}; + llvm::BasicBlock *ExitBlock; + }; + + LLVMState JITState; + LLVMCurrentState JITCurrentState; + llvm::LLVMContext *Con; + llvm::Function *Func; + + // Intrinsics + llvm::CallInst *Popcount(llvm::Value *Arg) { return JITState.IRBuilder->CreateUnaryIntrinsic(llvm::Intrinsic::ctpop, Arg); } + llvm::CallInst *BSwap(llvm::Value *Arg) { return JITState.IRBuilder->CreateUnaryIntrinsic(llvm::Intrinsic::bswap, Arg); } + llvm::CallInst *CTTZ(llvm::Value *Arg) { + std::vector ArgTypes = { + Arg->getType(), + }; + std::vector Args = { + Arg, + JITState.IRBuilder->getInt1(true), + }; + + return JITState.IRBuilder->CreateIntrinsic(llvm::Intrinsic::cttz, ArgTypes, Args); + } + + llvm::CallInst *CTLZ(llvm::Value *Arg) { + std::vector ArgTypes = { + Arg->getType(), + }; + std::vector Args = { + Arg, + JITState.IRBuilder->getInt1(true), + }; + + return JITState.IRBuilder->CreateIntrinsic(llvm::Intrinsic::ctlz, ArgTypes, Args); + } + + llvm::CallInst *FSHL(llvm::Value *Val, llvm::Value *Val2, llvm::Value *Amt) { + std::vector ArgTypes = { + Val->getType(), + }; + std::vector Args = { + Val, + Val2, + Amt, + }; + + return JITState.IRBuilder->CreateIntrinsic(llvm::Intrinsic::fshl, ArgTypes, Args); + } + + llvm::CallInst *FSHR(llvm::Value *Val, llvm::Value *Val2, llvm::Value *Amt) { + std::vector ArgTypes = { + Val->getType(), + }; + std::vector Args = { + Val, + Val2, + Amt, + }; + + return JITState.IRBuilder->CreateIntrinsic(llvm::Intrinsic::fshr, ArgTypes, Args); + } + llvm::CallInst *CycleCounter() { + return JITState.IRBuilder->CreateIntrinsic(llvm::Intrinsic::readcyclecounter, {}, {}); + } + + void CreateDebugPrint(llvm::Value *Val) { + std::vector Args; + Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast(this))); + Args.emplace_back(Val); + if (Val->getType()->getIntegerBitWidth() > 64) + JITState.IRBuilder->CreateCall(JITCurrentState.DebugPrint128, Args); + else + JITState.IRBuilder->CreateCall(JITCurrentState.DebugPrint, Args); + } + + void CreateGlobalVariables(llvm::ExecutionEngine *Engine, llvm::Module *FunctionModule); + + llvm::Value *CastVectorToType(llvm::Value *Arg, bool Integer, uint8_t RegisterSize, uint8_t ElementSize); + llvm::Value *CastToOpaqueStructure(llvm::Value *Arg, llvm::Type *DstType); + void SetDest(IR::NodeWrapper Op, llvm::Value *Val); + llvm::Value *GetSrc(IR::NodeWrapper Src); + + DestMapType DestMap; + FEXCore::IR::IRListView const *CurrentIR; + + std::unordered_map JumpTargets; + std::unordered_map ForwardJumpTargets; + + // Target Machines + const std::string arch = "x86-64"; + const std::string cpu = "znver2"; + const llvm::Triple TargetTriple{"x86_64", "unknown", "linux", "gnu"}; + const llvm::SmallVector Attrs; + llvm::TargetMachine *LLVMTarget; +}; + +LLVMJITCore::LLVMJITCore(FEXCore::Core::InternalThreadState *Thread) + : ThreadState {Thread} + , CTX {Thread->CTX} { + + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + + JITState.ContextRef = LLVMContextCreate(); + Con = *llvm::unwrap(&JITState.ContextRef); + JITState.MainModule = new llvm::Module("Main Module", *Con); + JITState.IRBuilder = new llvm::IRBuilder<>(*Con); + JITState.MainEngineBuilder = new llvm::EngineBuilder(std::unique_ptr(JITState.MainModule)); + JITState.MainEngineBuilder->setEngineKind(llvm::EngineKind::JIT); + LLVMTarget = JITState.MainEngineBuilder->selectTarget( + TargetTriple, + arch, cpu, Attrs); + + JITState.MemManager = new LLVMMemoryManager(); + CTX->Config.LLVM_MemoryValidation = false; +#if !DESTMAP_AS_MAP + DestMap.resize(0x1000); +#endif +} + +LLVMJITCore::~LLVMJITCore() { + // MainEngineBuilder takes overship of MainModule + delete JITState.MainEngineBuilder; + delete JITState.IRBuilder; + // Causes fault when destroying MCJIT + //for (auto Module : JITState.Functions) { + // delete Module; + //} + LLVMContextDispose(JITState.ContextRef); +} + +void LLVMJITCore::ValidateMemoryInVM(uint64_t Ptr, uint8_t Size, bool Load) { + uint64_t VirtualBase = CTX->MemoryMapper.GetBaseOffset(0); + uint64_t VirtualEnd = VirtualBase + (1ULL << 36ULL); + if (Ptr < VirtualBase || (Ptr + Size) >= VirtualEnd) { + LogMan::Msg::A("Invalid memory load at 0x%016lx. Wasn't within virtual range [0x%016lx, 0x%015lx)", Ptr, VirtualBase, VirtualEnd); + } + LogMan::Msg::D("%s guestmem: 0x%lx", Load ? "Loading from" : "Storing", Ptr - VirtualBase); +} + +void LLVMJITCore::DebugPrint(uint64_t Val) { + LogMan::Msg::I(">>>> Value in Arg: 0x%lx, %ld", Val, Val); +} + +void LLVMJITCore::DebugPrint128(__uint128_t Val) { + LogMan::Msg::I(">>>Val: %016lx, %016lx", static_cast(Val >> 64), static_cast(Val)); +} + +template +Type LLVMJITCore::MemoryLoad_Validate(uint64_t Ptr) { + ValidateMemoryInVM(Ptr, sizeof(Type), true); + Type *TypedAddr = reinterpret_cast(Ptr); + Type Ret = TypedAddr[0]; + uint64_t Data; + memcpy(&Data, &Ret, sizeof(Data)); + LogMan::Msg::D("\tLoading: 0x%016lx", Data); + return Ret; +} + +template +void LLVMJITCore::MemoryStore_Validate(uint64_t Ptr, Type Val) { + ValidateMemoryInVM(Ptr, sizeof(Type), false); + Type *TypedAddr = reinterpret_cast(Ptr); + TypedAddr[0] = Val; + uint64_t Data; + memcpy(&Data, &Val, sizeof(Data)); + LogMan::Msg::D("\tStoring: 0x%016lx", Data); +} + +llvm::Value *LLVMJITCore::CreateMemoryLoad(llvm::Value *Ptr) { + if (CTX->Config.LLVM_MemoryValidation) { + std::vector Args; + Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast(this))); + Args.emplace_back(Ptr); + + unsigned PtrSize = Ptr->getType()->getPointerElementType()->getIntegerBitWidth(); + switch (PtrSize) { + case 8: return JITState.IRBuilder->CreateCall(JITCurrentState.ValidateLoad8, Args); + case 16: return JITState.IRBuilder->CreateCall(JITCurrentState.ValidateLoad16, Args); + case 32: return JITState.IRBuilder->CreateCall(JITCurrentState.ValidateLoad32, Args); + case 64: return JITState.IRBuilder->CreateCall(JITCurrentState.ValidateLoad64, Args); + case 128: return JITState.IRBuilder->CreateCall(JITCurrentState.ValidateLoad128, Args); + default: LogMan::Msg::A("Unknown Load Size: %d", PtrSize); break; + } + } + + return JITState.IRBuilder->CreateLoad(Ptr); +} + +void LLVMJITCore::CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val) { + if (CTX->Config.LLVM_MemoryValidation) { + std::vector Args; + Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast(this))); + Args.emplace_back(Ptr); + Args.emplace_back(Val); + + unsigned PtrSize = Ptr->getType()->getPointerElementType()->getIntegerBitWidth(); + switch (PtrSize) { + case 8: JITState.IRBuilder->CreateCall(JITCurrentState.ValidateStore8, Args); break; + case 16: JITState.IRBuilder->CreateCall(JITCurrentState.ValidateStore16, Args); break; + case 32: JITState.IRBuilder->CreateCall(JITCurrentState.ValidateStore32, Args); break; + case 64: JITState.IRBuilder->CreateCall(JITCurrentState.ValidateStore64, Args); break; + case 128: JITState.IRBuilder->CreateCall(JITCurrentState.ValidateStore128, Args); break; + default: LogMan::Msg::A("Unknown Store Size: %d", PtrSize); break; + } + return; + } + + JITState.IRBuilder->CreateStore(Val, Ptr); +} + + +void LLVMJITCore::CreateGlobalVariables(llvm::ExecutionEngine *Engine, llvm::Module *FunctionModule) { + using namespace llvm; + Type *voidTy = Type::getVoidTy(*Con); + Type *i8 = Type::getInt8Ty(*Con); + Type *i16 = Type::getInt16Ty(*Con); + Type *i32 = Type::getInt32Ty(*Con); + Type *i64 = Type::getInt64Ty(*Con); + Type *i128 = Type::getInt128Ty(*Con); + + // Syscall Function + { + auto FuncType = FunctionType::get(i64, + { + i64, // Technically a this pointer + i64, + ArrayType::get(i64, 7)->getPointerTo(), + }, + false); + JITCurrentState.SyscallFunction = Function::Create(FuncType, + Function::ExternalLinkage, + "Syscall", + FunctionModule); + using ClassPtrType = uint64_t (FEXCore::SyscallHandler::*)(FEXCore::Core::InternalThreadState *, FEXCore::HLE::SyscallArguments *); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &FEXCore::SyscallHandler::HandleSyscall; + Engine->addGlobalMapping(JITCurrentState.SyscallFunction, Ptr.Data); + } + + // CPUID Function + { + auto FuncType = FunctionType::get(voidTy, + { + ArrayType::get(i32, 4)->getPointerTo(), + i64, // Technically this is a pointer + i32, // CPUID Function + }, + false); + JITCurrentState.CPUIDFunction = Function::Create(FuncType, + Function::ExternalLinkage, + "CPUID", + FunctionModule); + using ClassPtrType = void (*)(FEXCore::CPUIDEmu::FunctionResults*, FEXCore::CPUIDEmu*, uint32_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &CPUIDRun_Thunk; + Engine->addGlobalMapping(JITCurrentState.CPUIDFunction, Ptr.Data); + } + + // Exit VM function + { + auto FuncType = FunctionType::get(voidTy, + { + i64, // Technically this is a pointer + }, + false); + JITCurrentState.ExitVMFunction = Function::Create(FuncType, + Function::ExternalLinkage, + "ExitVM", + FunctionModule); + using ClassPtrType = void (*)(FEXCore::Core::InternalThreadState *Thread); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &SetExitState_Thunk; + Engine->addGlobalMapping(JITCurrentState.ExitVMFunction, Ptr.Data); + } + + if (CTX->Config.LLVM_MemoryValidation) { + // Memory validate load 8 + { + auto FuncType = FunctionType::get(i8, + {i64, // this pointer + i8->getPointerTo()}, false); + JITCurrentState.ValidateLoad8 = Function::Create(FuncType, + Function::ExternalLinkage, + "LoadValidate8", + FunctionModule); + using ClassPtrType = uint8_t (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryLoad_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateLoad8, Ptr.Data); + } + // Memory validate load 16 + { + auto FuncType = FunctionType::get(i16, + {i64, // this pointer + i16->getPointerTo()}, false); + JITCurrentState.ValidateLoad16 = Function::Create(FuncType, + Function::ExternalLinkage, + "LoadValidate16", + FunctionModule); + using ClassPtrType = uint16_t (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryLoad_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateLoad16, Ptr.Data); + } + // Memory validate load 32 + { + auto FuncType = FunctionType::get(i32, + {i64, // this pointer + i32->getPointerTo()}, false); + + JITCurrentState.ValidateLoad32 = Function::Create(FuncType, + Function::ExternalLinkage, + "LoadValidate32", + FunctionModule); + using ClassPtrType = uint32_t (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryLoad_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateLoad32, Ptr.Data); + } + // Memory validate load 64 + { + auto FuncType = FunctionType::get(i64, + {i64, // this pointer + i64->getPointerTo()}, false); + JITCurrentState.ValidateLoad64 = Function::Create(FuncType, + Function::ExternalLinkage, + "LoadValidate64", + FunctionModule); + using ClassPtrType = uint64_t (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryLoad_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateLoad64, Ptr.Data); + } + // Memory validate load 128 + { + auto FuncType = FunctionType::get(i128, + {i64, // this pointer + i128->getPointerTo()}, false); + JITCurrentState.ValidateLoad128 = Function::Create(FuncType, + Function::ExternalLinkage, + "LoadValidate128", + FunctionModule); + using ClassPtrType = __uint128_t (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryLoad_Validate<__uint128_t>; + Engine->addGlobalMapping(JITCurrentState.ValidateLoad128, Ptr.Data); + } + + // Memory validate Store 8 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i8->getPointerTo(), + i8}, false); + JITCurrentState.ValidateStore8 = Function::Create(FuncType, + Function::ExternalLinkage, + "StoreValidate8", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t, uint8_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryStore_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateStore8, Ptr.Data); + } + + // Memory validate Store 16 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i16->getPointerTo(), + i16}, false); + JITCurrentState.ValidateStore16 = Function::Create(FuncType, + Function::ExternalLinkage, + "StoreValidate16", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t, uint16_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryStore_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateStore16, Ptr.Data); + } + + // Memory validate Store 32 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i32->getPointerTo(), + i32}, false); + JITCurrentState.ValidateStore32 = Function::Create(FuncType, + Function::ExternalLinkage, + "StoreValidate32", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t, uint32_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryStore_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateStore32, Ptr.Data); + } + + // Memory validate Store 64 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i64->getPointerTo(), + i64}, false); + JITCurrentState.ValidateStore64 = Function::Create(FuncType, + Function::ExternalLinkage, + "StoreValidate64", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t, uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryStore_Validate; + Engine->addGlobalMapping(JITCurrentState.ValidateStore64, Ptr.Data); + } + + // Memory validate Store 128 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i128->getPointerTo(), + i128}, false); + JITCurrentState.ValidateStore128 = Function::Create(FuncType, + Function::ExternalLinkage, + "StoreValidate128", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t, __uint128_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::MemoryStore_Validate<__uint128_t>; + Engine->addGlobalMapping(JITCurrentState.ValidateStore128, Ptr.Data); + } + } + + // Value Print + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i64}, false); + JITCurrentState.DebugPrint = Function::Create(FuncType, + Function::ExternalLinkage, + "PrintVal", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(uint64_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::DebugPrint; + Engine->addGlobalMapping(JITCurrentState.DebugPrint, Ptr.Data); + } + + // Value Print 128 + { + auto FuncType = FunctionType::get(voidTy, + {i64, // this pointer + i128}, false); + JITCurrentState.DebugPrint128 = Function::Create(FuncType, + Function::ExternalLinkage, + "PrintVal128", + FunctionModule); + using ClassPtrType = void (LLVMJITCore::*)(__uint128_t); + union PtrCast { + ClassPtrType ClassPtr; + void* Data; + }; + PtrCast Ptr; + Ptr.ClassPtr = &LLVMJITCore::DebugPrint128; + Engine->addGlobalMapping(JITCurrentState.DebugPrint128, Ptr.Data); + } + + // JIT State + { + JITCurrentState.CPUStateType = StructType::create(*Con, + { + i64, // RIP + ArrayType::get(i64, 16), // Gregs + i64, // Pad to ensure alignment + ArrayType::get(i128, 16), // XMMs + i64, i64, // GS, FS + ArrayType::get(i8, 32), //rflags + }, + "CPUStateType"); + + FunctionModule->getOrInsertGlobal("X86State::State", JITCurrentState.CPUStateType->getPointerTo()); + JITCurrentState.CPUStateVar = FunctionModule->getNamedGlobal("X86State::State"); + JITCurrentState.CPUStateVar->setConstant(true); + JITCurrentState.CPUStateVar->setInitializer( + ConstantInt::getIntegerValue( + JITCurrentState.CPUStateType->getPointerTo(), + APInt(64, reinterpret_cast(&ThreadState->State)))); + JITCurrentState.CPUState = JITState.IRBuilder->CreateLoad(JITCurrentState.CPUStateVar, false, "X86State::State::Local"); + } +} + +llvm::Value *LLVMJITCore::CreateContextGEP(uint64_t Offset, uint8_t Size) { + std::vector GEPValues = { + JITState.IRBuilder->getInt32(0), // First value in the pointer to CPUState + }; + + if (Offset == 0) { // RIP + if (Size != 8) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(0)); + } + else if (Offset >= offsetof(FEXCore::Core::CPUState, gregs) && Offset < offsetof(FEXCore::Core::CPUState, xmm)) { + if (Size != 8 || Offset % 8 != 0) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(1)); + GEPValues.emplace_back(JITState.IRBuilder->getInt32((Offset - offsetof(FEXCore::Core::CPUState, gregs)) / 8)); + } + else if (Offset >= offsetof(FEXCore::Core::CPUState, xmm) && Offset < offsetof(FEXCore::Core::CPUState, gs)) { + if (Size != 16 || Offset % 16 != 0) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(3)); + GEPValues.emplace_back(JITState.IRBuilder->getInt32((Offset - offsetof(FEXCore::Core::CPUState, xmm)) / 16)); + } + else if (Offset == offsetof(FEXCore::Core::CPUState, gs)) { + if (Size != 8) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(4)); + } + else if (Offset == offsetof(FEXCore::Core::CPUState, fs)) { + if (Size != 8) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(5)); + } + else if (Offset >= offsetof(FEXCore::Core::CPUState, flags)) { + if (Size != 1) return nullptr; + GEPValues.emplace_back(JITState.IRBuilder->getInt32(6)); + GEPValues.emplace_back(JITState.IRBuilder->getInt32(Offset - offsetof(FEXCore::Core::CPUState, flags[0]))); + } + else + LogMan::Msg::A("Unknown X86State GEP: 0x%lx", Offset); + + return JITState.IRBuilder->CreateGEP(JITCurrentState.CPUState, GEPValues, "Context::Value"); +} + +llvm::Value *LLVMJITCore::CreateContextPtr(uint64_t Offset, uint8_t Size) { + llvm::Type *i8 = llvm::Type::getInt8Ty(*Con); + llvm::Type *i16 = llvm::Type::getInt16Ty(*Con); + llvm::Type *i32 = llvm::Type::getInt32Ty(*Con); + llvm::Type *i64 = llvm::Type::getInt64Ty(*Con); + llvm::Type *i128 = llvm::Type::getInt128Ty(*Con); + + // Let's try to create our pointer with GEP + // This can only happen if we are a full value from the context and is aligned correctly + llvm::Value *GEPResult = CreateContextGEP(Offset, Size); + if (GEPResult) return GEPResult; + + llvm::Value *StateBasePtr = JITState.IRBuilder->CreatePtrToInt(JITCurrentState.CPUState, i64); + StateBasePtr = JITState.IRBuilder->CreateAdd(StateBasePtr, JITState.IRBuilder->getInt64(Offset)); + + // Convert back to pointer of correct size + switch (Size) { + case 1: return JITState.IRBuilder->CreateIntToPtr(StateBasePtr, i8->getPointerTo()); + case 2: return JITState.IRBuilder->CreateIntToPtr(StateBasePtr, i16->getPointerTo()); + case 4: return JITState.IRBuilder->CreateIntToPtr(StateBasePtr, i32->getPointerTo()); + case 8: return JITState.IRBuilder->CreateIntToPtr(StateBasePtr, i64->getPointerTo()); + case 16: return JITState.IRBuilder->CreateIntToPtr(StateBasePtr, i128->getPointerTo()); + default: LogMan::Msg::A("Unknown context pointer size: %d", Size); break; + } + return nullptr; +} + +llvm::Value *LLVMJITCore::CastVectorToType(llvm::Value *Arg, bool Integer, uint8_t RegisterSize, uint8_t ElementSize) { + uint8_t NumElements = RegisterSize / ElementSize; + llvm::Type *ElementType; + if (Integer) { + ElementType = llvm::Type::getIntNTy(*Con, ElementSize * 8); + } + else { + if (ElementSize == 4) { + ElementType = llvm::Type::getFloatTy(*Con); + } + else { + ElementType = llvm::Type::getDoubleTy(*Con); + } + } + + llvm::Type *VectorType = llvm::VectorType::get(ElementType, NumElements); + + // This happens frequently + // If the source argument isn't of vector type then BitCast fails moving from Scalar->Vector domains + // Need to create a vector and insert elements in to that vector from the scalar type instead + if (!Arg->getType()->isVectorTy()) { + return JITState.IRBuilder->CreateBitCast(Arg, VectorType); + } + + return JITState.IRBuilder->CreateBitCast(Arg, VectorType); +} + +llvm::Value *LLVMJITCore::CastToOpaqueStructure(llvm::Value *Arg, llvm::Type *DstType) { + if (Arg->getType()->isVectorTy()) { + // First do a bitcast from the vector type to the same size integer + unsigned ElementSize = Arg->getType()->getVectorElementType()->getIntegerBitWidth(); + unsigned NumElements = Arg->getType()->getVectorNumElements(); + auto NewIntegerType = llvm::Type::getIntNTy(*Con, ElementSize * NumElements); + Arg = JITState.IRBuilder->CreateBitCast(Arg, NewIntegerType); + } + + return JITState.IRBuilder->CreateZExtOrTrunc(Arg, DstType->getPointerElementType()); +} + +void LLVMJITCore::SetDest(IR::NodeWrapper Op, llvm::Value *Val) { + DestMap[Op.NodeOffset] = Val; +} + +llvm::Value *LLVMJITCore::GetSrc(IR::NodeWrapper Src) { +#if DESTMAP_AS_MAP + LogMan::Throw::A(DestMap.find(Src.NodeOffset) != DestMap.end(), "Op had Src but wasn't added to the dest map"); +#endif + + auto DstPtr = DestMap[Src.NodeOffset]; + LogMan::Throw::A(DstPtr != nullptr, "Destmap had slot but wasn't allocated memory"); + return DstPtr; +} + +void LLVMJITCore::HandleIR(FEXCore::IR::IRListView const *IR, IR::NodeWrapperIterator *Node) { + using namespace llvm; + + uintptr_t ListBegin = CurrentIR->GetListData(); + uintptr_t DataBegin = CurrentIR->GetData(); + + IR::NodeWrapper *WrapperOp = (*Node)(); + IR::OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + uint8_t OpSize = IROp->Size; + + switch (IROp->Op) { + case FEXCore::IR::IROps::OP_BEGINBLOCK: { + auto ForwardIt = ForwardJumpTargets.find(WrapperOp->NodeOffset); + if (ForwardIt != ForwardJumpTargets.end()) { + // This block has already been created for us, just move over to it + JITState.IRBuilder->SetInsertPoint(ForwardIt->second); + } + else { + auto Block = BasicBlock::Create(*Con, "BeginBlock", Func); + JITCurrentState.Blocks.emplace_back(Block); + + // Blocks can be jump targets + JumpTargets[WrapperOp->NodeOffset] = Block; + + // We need to do a jump from previous block to this block + // This ensures block fallthrough works + // Although if the previously block already had a terminator then skip the jump + if (!JITCurrentState.CurrentBlockHasTerm) { + JITState.IRBuilder->CreateBr(Block); + } + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + } + } + break; + case IR::OP_ENDBLOCK: { + auto Op = IROp->C(); + + if (Op->RIPIncrement) { + auto DownCountValue = JITState.IRBuilder->CreateGEP(JITCurrentState.CPUState, + { + JITState.IRBuilder->getInt32(0), + JITState.IRBuilder->getInt32(0), + }, + "RIPIncrement"); + auto LoadRIP = JITState.IRBuilder->CreateLoad(DownCountValue); + auto NewValue = JITState.IRBuilder->CreateAdd(LoadRIP, JITState.IRBuilder->getInt64(Op->RIPIncrement)); + JITState.IRBuilder->CreateStore(NewValue, DownCountValue); + } + + // If we hit an end block that isn't at the end of the stream that means we need to early exit + // Just set ourselves to the end regardless + if (CTX->Config.Multiblock) { + // Fall through to the next block + // Just in case some additional garbage needs to fall through + auto Block = BasicBlock::Create(*Con, "EndBlock_Fallthrough", Func); + JITCurrentState.Blocks.emplace_back(Block); + + if (!JITCurrentState.CurrentBlockHasTerm) { + JITState.IRBuilder->CreateBr(Block); + } + + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + + } + else { + if (!JITCurrentState.CurrentBlockHasTerm) { + JITState.IRBuilder->CreateBr(JITCurrentState.ExitBlock); + auto Block = BasicBlock::Create(*Con, "EndBlock_Fallthrough", Func); + JITCurrentState.Blocks.emplace_back(Block); + + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + } + } + break; + } + case IR::OP_BREAK: { + std::vector Args; + // We need to pull this argument from the ExecuteCodeFunction + Args.emplace_back(Func->args().begin()); + + JITState.IRBuilder->CreateCall(JITCurrentState.ExitVMFunction, Args); + JITState.IRBuilder->CreateBr(JITCurrentState.ExitBlock); + + // Just in case some additional garbage needs to fall through + auto Block = BasicBlock::Create(*Con, "Break_Fallthrough", Func); + JITCurrentState.Blocks.emplace_back(Block); + + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + break; + } + case IR::OP_EXITFUNCTION: + case IR::OP_ENDFUNCTION: { + JITState.IRBuilder->CreateBr(JITCurrentState.ExitBlock); + JITCurrentState.CurrentBlockHasTerm = true; + break; + } + case IR::OP_JUMP: { + auto Op = IROp->C(); + auto JumpTarget = Op->Header.Args[0].NodeOffset; + llvm::BasicBlock *Target; + auto ForwardIt = ForwardJumpTargets.find(JumpTarget); + if (ForwardIt == ForwardJumpTargets.end()) { + // If the target doesn't yet exist then create it now + Target = BasicBlock::Create(*Con, "ForwardJump_Target", Func); + JITCurrentState.Blocks.emplace_back(Target); + ForwardJumpTargets[JumpTarget] = Target; + } + else { + // If we have the branch created already then we can just jump to it + Target = ForwardIt->second; + } + JITState.IRBuilder->CreateBr(Target); + JITCurrentState.CurrentBlockHasTerm = true; + break; + } + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + auto Cond = GetSrc(Op->Header.Args[0]); + auto JumpTarget = Op->Header.Args[1].NodeOffset; + + auto Comp = JITState.IRBuilder->CreateICmpNE(Cond, JITState.IRBuilder->getInt64(0)); + if (JumpTarget < WrapperOp->NodeOffset) { + // Backwards branch means the target block is already created + auto Block = BasicBlock::Create(*Con, "CondJump_FalseBlock", Func); + JITCurrentState.Blocks.emplace_back(Block); + + JITState.IRBuilder->CreateCondBr(Comp, JumpTargets[JumpTarget], Block); + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + } + else { + // If we are forward jumping then we need to create two new blocks + // One for continuing execution and another for the true conditional path + // If the target already exists in the forward block map then we just use that + llvm::BasicBlock *TrueBlock; + auto ForwardIt = ForwardJumpTargets.find(JumpTarget); + if (ForwardIt == ForwardJumpTargets.end()) { + // Add the True path to our forward block map so when we hit it in the future we can just use it + auto Block = BasicBlock::Create(*Con, "CondJump_TrueBlock", Func); + JITCurrentState.Blocks.emplace_back(Block); + + ForwardJumpTargets[JumpTarget] = Block; + TrueBlock = Block; + } + else { + TrueBlock = ForwardIt->second; + } + auto Block = BasicBlock::Create(*Con, "CondJump_FalseBlock", Func); + JITCurrentState.Blocks.emplace_back(Block); + + JITState.IRBuilder->CreateCondBr(Comp, TrueBlock, Block); + JITState.IRBuilder->SetInsertPoint(Block); + JITCurrentState.CurrentBlock = Block; + JITCurrentState.CurrentBlockHasTerm = false; + } + break; + } + case IR::OP_MOV: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + SetDest(*WrapperOp, Src); + break; + } + case IR::OP_SELECT: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + auto ArgTrue = GetSrc(Op->Header.Args[2]); + auto ArgFalse = GetSrc(Op->Header.Args[3]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + ArgFalse = JITState.IRBuilder->CreateZExtOrTrunc(ArgFalse, ArgTrue->getType()); + + Value *Cmp{}; + switch (Op->Cond) { + case FEXCore::IR::COND_EQ: + Cmp = JITState.IRBuilder->CreateICmpEQ(Src1, Src2); + break; + case FEXCore::IR::COND_NEQ: + Cmp = JITState.IRBuilder->CreateICmpNE(Src1, Src2); + break; + case FEXCore::IR::COND_GE: + Cmp = JITState.IRBuilder->CreateICmpUGE(Src1, Src2); + break; + case FEXCore::IR::COND_LT: + Cmp = JITState.IRBuilder->CreateICmpULT(Src1, Src2); + break; + case FEXCore::IR::COND_GT: + Cmp = JITState.IRBuilder->CreateICmpUGT(Src1, Src2); + break; + case FEXCore::IR::COND_LE: + Cmp = JITState.IRBuilder->CreateICmpULE(Src1, Src2); + break; + default: LogMan::Msg::A("Unknown Select Op Type: %d", Op->Cond); break; + } + + auto Result = JITState.IRBuilder->CreateSelect(Cmp, ArgTrue, ArgFalse); + SetDest(*WrapperOp, Result); + break; + } + case FEXCore::IR::IROps::OP_CONSTANT: { + auto Op = IROp->C(); + auto Result = JITState.IRBuilder->getInt64(Op->Constant); + SetDest(*WrapperOp, Result); + break; + } + case FEXCore::IR::IROps::OP_SYSCALL: { + auto Op = IROp->C(); + + std::vector Args; + Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast(&CTX->SyscallHandler))); + // We need to pull this argument from the ExecuteCodeFunction + Args.emplace_back(Func->args().begin()); + + auto LLVMArgs = JITState.IRBuilder->CreateAlloca(ArrayType::get(Type::getInt64Ty(*Con), 7)); + for (unsigned i = 0; i < 7; ++i) { + auto Location = JITState.IRBuilder->CreateGEP(LLVMArgs, + { + JITState.IRBuilder->getInt32(0), + JITState.IRBuilder->getInt32(i), + }, + "Arg"); + auto Src = GetSrc(Op->Header.Args[i]); + JITState.IRBuilder->CreateStore(Src, Location); + } + Args.emplace_back(LLVMArgs); + + auto Result = JITState.IRBuilder->CreateCall(JITCurrentState.SyscallFunction, Args); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_CPUID: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + std::vector Args{}; + + auto ReturnType = ArrayType::get(Type::getInt32Ty(*Con), 4); + auto LLVMArgs = JITState.IRBuilder->CreateAlloca(ReturnType); + Args.emplace_back(LLVMArgs); + Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast(&CTX->CPUID))); + Args.emplace_back(Src); + JITState.IRBuilder->CreateCall(JITCurrentState.CPUIDFunction, Args); + auto Result = JITState.IRBuilder->CreateLoad(ReturnType, LLVMArgs); + SetDest(*WrapperOp, Result); + break; + } + // The IR's current representation of vectors is actually an array + case IR::OP_EXTRACTELEMENT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + std::vector Idxs = {Op->Idx}; + auto Result = JITState.IRBuilder->CreateExtractValue(Src, Idxs); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); + auto Value = CreateContextPtr(Op->Offset, Op->Size); + llvm::Value *Load; + if ((Op->Offset % Op->Size) == 0) + Load = JITState.IRBuilder->CreateAlignedLoad(Value, Op->Size); + else + Load = JITState.IRBuilder->CreateLoad(Value); + SetDest(*WrapperOp, Load); + break; + } + case IR::OP_STORECONTEXT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Value = CreateContextPtr(Op->Offset, Op->Size); + Src = CastToOpaqueStructure(Src, Value->getType()); + + if ((Op->Offset % Op->Size) == 0) + JITState.IRBuilder->CreateAlignedStore(Src, Value, Op->Size); + else + JITState.IRBuilder->CreateStore(Src, Value); + break; + } + case IR::OP_LOADFLAG: { + auto Op = IROp->C(); + auto Value = CreateContextPtr(offsetof(FEXCore::Core::CPUState, flags) + Op->Flag, 1); + auto Load = JITState.IRBuilder->CreateLoad(Value); + SetDest(*WrapperOp, Load); + break; + } + case IR::OP_STOREFLAG: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Value = CreateContextPtr(offsetof(FEXCore::Core::CPUState, flags) + Op->Flag, 1); + Src = JITState.IRBuilder->CreateZExtOrTrunc(Src, Type::getInt8Ty(*Con)); + Src = JITState.IRBuilder->CreateAnd(Src, JITState.IRBuilder->getInt8(1)); + + JITState.IRBuilder->CreateStore(Src, Value); + break; + } + case IR::OP_ADD: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateAdd(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_SUB: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateSub(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_XOR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateXor(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_BFE: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + LogMan::Throw::A(OpSize <= 16, "OpSize is too large for BFE: %d", OpSize); + + auto BitWidth = Src->getType()->getIntegerBitWidth(); + if (OpSize == 16) { + LogMan::Throw::A(Op->Width <= 64, "Can't extract width of %d", Op->Width); + + // Generate our 128bit mask + auto SourceMask = JITState.IRBuilder->CreateShl(JITState.IRBuilder->getIntN(BitWidth, 1), JITState.IRBuilder->getIntN(BitWidth, Op->Width)); + SourceMask = JITState.IRBuilder->CreateSub(SourceMask, JITState.IRBuilder->getIntN(BitWidth, 1)); + + // Shift the source in to the correct location + auto Result = JITState.IRBuilder->CreateLShr(Src, JITState.IRBuilder->getIntN(BitWidth, Op->lsb)); + // Mask what we want + Result = JITState.IRBuilder->CreateAnd(Result, SourceMask); + SetDest(*WrapperOp, Result); + } + else { + uint64_t SourceMask = (1ULL << Op->Width) - 1; + if (Op->Width == 64) + SourceMask = ~0ULL; + + auto Result = JITState.IRBuilder->CreateLShr(Src, JITState.IRBuilder->getIntN(BitWidth, Op->lsb)); + Result = JITState.IRBuilder->CreateAnd(Result, + JITState.IRBuilder->getIntN(BitWidth, SourceMask)); + SetDest(*WrapperOp, Result); + } + break; + } + case IR::OP_BFI: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + uint64_t SourceMask = (1ULL << Op->Width) - 1; + if (Op->Width == 64) + SourceMask = ~0ULL; + uint64_t DestMask = ~(SourceMask << Op->lsb); + + auto BitWidth = Src1->getType()->getIntegerBitWidth(); + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + auto MaskedDest = JITState.IRBuilder->CreateAnd(Src1, JITState.IRBuilder->getIntN(BitWidth, DestMask)); + auto MaskedSrc = JITState.IRBuilder->CreateAnd(Src2, JITState.IRBuilder->getIntN(BitWidth, SourceMask)); + MaskedSrc = JITState.IRBuilder->CreateShl(MaskedSrc, JITState.IRBuilder->getIntN(BitWidth, Op->lsb)); + + auto Result = JITState.IRBuilder->CreateOr(MaskedDest, MaskedSrc); + + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LSHR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Our IR assumes defined behaviour for shifting all the bits out of the value + // So we need to ZEXT to the next size up and then trunc + auto OriginalType = Src1->getType(); + auto BiggerType = Type::getIntNTy(*Con, 128); + Src1 = JITState.IRBuilder->CreateZExt(Src1, BiggerType); + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, BiggerType); + + auto Result = JITState.IRBuilder->CreateLShr(Src1, Src2); + Result = JITState.IRBuilder->CreateTrunc(Result, OriginalType); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_ASHR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Our IR assumes defined behaviour for shifting all the bits out of the value + // So we need to ZEXT to the next size up and then trunc + auto OriginalType = Src1->getType(); + auto BiggerType = Type::getIntNTy(*Con, 128); + Src1 = JITState.IRBuilder->CreateSExt(Src1, BiggerType); + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, BiggerType); + + auto Result = JITState.IRBuilder->CreateAShr(Src1, Src2); + Result = JITState.IRBuilder->CreateTrunc(Result, OriginalType); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LSHL: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Our IR assumes defined behaviour for shifting all the bits out of the value + // So we need to ZEXT to the next size up and then trunc + auto OriginalType = Src1->getType(); + auto BiggerType = Type::getIntNTy(*Con, 128); + Src1 = JITState.IRBuilder->CreateZExt(Src1, BiggerType); + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, BiggerType); + + auto Result = JITState.IRBuilder->CreateShl(Src1, Src2); + Result = JITState.IRBuilder->CreateTrunc(Result, OriginalType); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_AND: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateAnd(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_UMUL: + case IR::OP_MUL: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateMul(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_ROL: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = FSHL(Src1, Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_ROR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + auto Result = FSHR(Src1, Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_PRINT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + if (Src->getType()->getIntegerBitWidth() < 64) { + Src = JITState.IRBuilder->CreateZExtOrTrunc(Src, Type::getInt64Ty(*Con)); + } + CreateDebugPrint(Src); + break; + } + + case IR::OP_CYCLECOUNTER: { +#ifdef DEBUG_CYCLES + SetDest(*WrapperOp, JITState.IRBuilder->getInt64(0)); +#else + SetDest(*WrapperOp, CycleCounter()); +#endif + break; + } + + case IR::OP_POPCOUNT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + + SetDest(*WrapperOp, Popcount(Src)); + break; + } + case IR::OP_FINDLSB: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + + unsigned SrcBitWidth = Src->getType()->getIntegerBitWidth(); + llvm::Value *Result = CTTZ(Src); + + // Need to compare source to zero, since we are expecting -1 on zero, llvm CTTZ returns undef on zero + auto Comp = JITState.IRBuilder->CreateICmpEQ(Src, JITState.IRBuilder->getIntN(SrcBitWidth, 0)); + Result = JITState.IRBuilder->CreateSelect(Comp, JITState.IRBuilder->getIntN(SrcBitWidth, ~0ULL), Result); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_FINDMSB: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + + unsigned SrcBitWidth = Src->getType()->getIntegerBitWidth(); + llvm::Value *Result = CTLZ(Src); + + Result = JITState.IRBuilder->CreateSub(JITState.IRBuilder->getIntN(SrcBitWidth, SrcBitWidth), Result); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_SEXT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + llvm::Type *SourceType = Type::getIntNTy(*Con, Op->SrcSize); + llvm::Type *TargetType = Type::getIntNTy(*Con, OpSize * 8); + + auto Result = JITState.IRBuilder->CreateSExtOrTrunc(Src, SourceType); + Result = JITState.IRBuilder->CreateSExt(Result, TargetType); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_ZEXT: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + llvm::Type *SourceType = Type::getIntNTy(*Con, Op->SrcSize); + llvm::Type *TargetType = Type::getIntNTy(*Con, OpSize * 8); + + auto Result = JITState.IRBuilder->CreateZExtOrTrunc(Src, SourceType); + Result = JITState.IRBuilder->CreateZExt(Result, TargetType); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_OR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()); + + auto Result = JITState.IRBuilder->CreateOr(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_UDIV: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Divisor = GetSrc(Op->Header.Args[1]); + + Divisor = JITState.IRBuilder->CreateZExtOrTrunc(Divisor, Src->getType()); + + auto Result = JITState.IRBuilder->CreateUDiv(Src, Divisor); + SetDest(*WrapperOp, Result); + + break; + } + case IR::OP_DIV: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Divisor = GetSrc(Op->Header.Args[1]); + + Divisor = JITState.IRBuilder->CreateZExtOrTrunc(Divisor, Src->getType()); + + auto Result = JITState.IRBuilder->CreateSDiv(Src, Divisor); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_UREM: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Divisor = GetSrc(Op->Header.Args[1]); + + Divisor = JITState.IRBuilder->CreateZExtOrTrunc(Divisor, Src->getType()); + + auto Result = JITState.IRBuilder->CreateURem(Src, Divisor); + SetDest(*WrapperOp, Result); + + break; + } + case IR::OP_REM: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + auto Divisor = GetSrc(Op->Header.Args[1]); + + Divisor = JITState.IRBuilder->CreateZExtOrTrunc(Divisor, Src->getType()); + + auto Result = JITState.IRBuilder->CreateSRem(Src, Divisor); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LUDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto SrcLow = GetSrc(Op->Header.Args[0]); + auto SrcHigh = GetSrc(Op->Header.Args[1]); + auto Divisor = GetSrc(Op->Header.Args[2]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Zero extend all values to large size + SrcLow = JITState.IRBuilder->CreateZExt(SrcLow, iLarge); + SrcHigh = JITState.IRBuilder->CreateZExt(SrcHigh, iLarge); + Divisor = JITState.IRBuilder->CreateZExt(Divisor, iLarge); + // Combine the split values + SrcHigh = JITState.IRBuilder->CreateShl(SrcHigh, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + auto Dividend = JITState.IRBuilder->CreateOr(SrcHigh, SrcLow); + + // Now do the divide + auto Result = JITState.IRBuilder->CreateUDiv(Dividend, Divisor); + + // Now truncate back down origina size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LDIV: { + auto Op = IROp->C(); + // Each source is OpSize in size + // So you can have up to a 128bit divide from x86-64 + auto SrcLow = GetSrc(Op->Header.Args[0]); + auto SrcHigh = GetSrc(Op->Header.Args[1]); + auto Divisor = GetSrc(Op->Header.Args[2]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Zero extend all values to large size + SrcLow = JITState.IRBuilder->CreateZExt(SrcLow, iLarge); + SrcHigh = JITState.IRBuilder->CreateZExt(SrcHigh, iLarge); + Divisor = JITState.IRBuilder->CreateSExt(Divisor, iLarge); + // Combine the split values + SrcHigh = JITState.IRBuilder->CreateShl(SrcHigh, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + auto Dividend = JITState.IRBuilder->CreateOr(SrcHigh, SrcLow); + + // Now do the divide + auto Result = JITState.IRBuilder->CreateSDiv(Dividend, Divisor); + + // Now truncate back down origina size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_LUREM: { + auto Op = IROp->C(); + // Each source is OpOpSize in size + // So you can have up to a 128bit divide from x86-64 + auto SrcLow = GetSrc(Op->Header.Args[0]); + auto SrcHigh = GetSrc(Op->Header.Args[1]); + auto Divisor = GetSrc(Op->Header.Args[2]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Zero extend all values to large size + SrcLow = JITState.IRBuilder->CreateZExt(SrcLow, iLarge); + SrcHigh = JITState.IRBuilder->CreateZExt(SrcHigh, iLarge); + Divisor = JITState.IRBuilder->CreateZExt(Divisor, iLarge); + // Combine the split values + SrcHigh = JITState.IRBuilder->CreateShl(SrcHigh, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + auto Dividend = JITState.IRBuilder->CreateOr(SrcHigh, SrcLow); + + // Now do the remainder + auto Result = JITState.IRBuilder->CreateURem(Dividend, Divisor); + + // Now truncate back down origina size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_LREM: { + auto Op = IROp->C(); + // Each source is OpOpSize in size + // So you can have up to a 128bit divide from x86-64 + auto SrcLow = GetSrc(Op->Header.Args[0]); + auto SrcHigh = GetSrc(Op->Header.Args[1]); + auto Divisor = GetSrc(Op->Header.Args[2]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Zero extend all values to large size + SrcLow = JITState.IRBuilder->CreateZExt(SrcLow, iLarge); + SrcHigh = JITState.IRBuilder->CreateZExt(SrcHigh, iLarge); + Divisor = JITState.IRBuilder->CreateSExt(Divisor, iLarge); + // Combine the split values + SrcHigh = JITState.IRBuilder->CreateShl(SrcHigh, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + auto Dividend = JITState.IRBuilder->CreateOr(SrcHigh, SrcLow); + + // Now do the remainder + auto Result = JITState.IRBuilder->CreateSRem(Dividend, Divisor); + + // Now truncate back down origina size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_UMULH: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Zero extend all values to larger value + Src1 = JITState.IRBuilder->CreateZExt(Src1, iLarge); + Src2 = JITState.IRBuilder->CreateZExt(Src2, iLarge); + + // Do the large multiply + auto Result = JITState.IRBuilder->CreateMul(Src1, Src2); + Result = JITState.IRBuilder->CreateLShr(Result, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + // Now truncate back down to origianl size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_MULH: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + Type *iNormal = Type::getIntNTy(*Con, OpSize * 8); + Type *iLarge = Type::getIntNTy(*Con, OpSize * 8 * 2); + + // Sign extend all values to larger value + Src1 = JITState.IRBuilder->CreateSExt(Src1, iLarge); + Src2 = JITState.IRBuilder->CreateSExt(Src2, iLarge); + + // Do the large multiply + auto Result = JITState.IRBuilder->CreateMul(Src1, Src2); + Result = JITState.IRBuilder->CreateLShr(Result, JITState.IRBuilder->getIntN(OpSize * 8 * 2, OpSize * 8)); + + // Now truncate back down to origianl size and store + Result = JITState.IRBuilder->CreateTrunc(Result, iNormal); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_REV: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + SetDest(*WrapperOp, BSwap(Src)); + break; + } + case IR::OP_CREATEVECTOR2: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Value *Undef = UndefValue::get(VectorType::get(Src1->getType(), 2)); + + // Src1 = CastToOpaqueStructure(Src1, ElementType); + // Src2 = CastToOpaqueStructure(Src2, ElementType); + + Undef = JITState.IRBuilder->CreateInsertElement(Undef, Src1, JITState.IRBuilder->getInt32(0)); + Undef = JITState.IRBuilder->CreateInsertElement(Undef, Src2, JITState.IRBuilder->getInt32(1)); + SetDest(*WrapperOp, Undef); + break; + } + case IR::OP_SPLATVECTOR2: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + auto Src = GetSrc(Op->Header.Args[0]); + + auto Result = JITState.IRBuilder->CreateVectorSplat(2, Src); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_SPLATVECTOR3: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + auto Src = GetSrc(Op->Header.Args[0]); + + auto Result = JITState.IRBuilder->CreateVectorSplat(3, Src); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_SPLATVECTOR4: { + auto Op = IROp->C(); + LogMan::Throw::A(OpSize <= 16, "Can't handle a vector of size: %d", OpSize); + auto Src = GetSrc(Op->Header.Args[0]); + + auto Result = JITState.IRBuilder->CreateVectorSplat(4, Src); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VOR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + auto Result = JITState.IRBuilder->CreateOr(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VXOR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + auto Result = JITState.IRBuilder->CreateXor(Src1, Src2); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VADD: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + auto Result = JITState.IRBuilder->CreateAdd(Src1, Src2); + + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VSUB: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + auto Result = JITState.IRBuilder->CreateSub(Src1, Src2); + + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_VUSHL: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + // Now we will do a lshr -> + auto Result = JITState.IRBuilder->CreateShl(Src1, Src2); + + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_VUSHLS: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, Src1->getType()->getScalarType()); + Src2 = JITState.IRBuilder->CreateVectorSplat(Op->RegisterSize / Op->ElementSize, Src2); + + // Now we will do a lshr -> + auto Result = JITState.IRBuilder->CreateShl(Src1, Src2); + + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_VUSHR: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = JITState.IRBuilder->CreateVectorSplat(Op->RegisterSize / Op->ElementSize, Src2); + + // Now we will do a lshr -> + auto Result = JITState.IRBuilder->CreateLShr(Src1, Src2); + + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_VCMPEQ: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + // Do an icmpeq, this will return a vector of + auto Result = JITState.IRBuilder->CreateICmpEQ(Src1, Src2); + + // Now we will do a sext -> + Result = JITState.IRBuilder->CreateSExt(Result, Src1->getType()); + + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VCMPGT: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + // Do an icmpeq, this will return a vector of + auto Result = JITState.IRBuilder->CreateICmpSGT(Src1, Src2); + + // Now we will do a sext -> + Result = JITState.IRBuilder->CreateSExt(Result, Src1->getType()); + + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_VZIP2: + case IR::OP_VZIP: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + unsigned NumElements = Op->RegisterSize / Op->ElementSize; + unsigned BaseElement = IROp->Op == IR::OP_VZIP2 ? NumElements / 2 : 0; + std::vector VectorMask; + for (unsigned i = 0; i < NumElements; ++i) { + unsigned shfl = i % 2 ? (NumElements + (i >> 1)) : (i >> 1); + shfl += BaseElement; + VectorMask.emplace_back(shfl); + } + + auto VectorMaskConstant = ConstantDataVector::get(*Con, VectorMask); + + auto Result = JITState.IRBuilder->CreateShuffleVector(Src1, Src2, VectorMaskConstant); + + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_VINSELEMENT: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + + // Cast to the type we want + Src1 = CastVectorToType(Src1, true, Op->RegisterSize, Op->ElementSize); + Src2 = CastVectorToType(Src2, true, Op->RegisterSize, Op->ElementSize); + + // Extract our source index + auto Source = JITState.IRBuilder->CreateExtractElement(Src2, JITState.IRBuilder->getInt32(Op->SrcIdx)); + auto Result = JITState.IRBuilder->CreateInsertElement(Src1, Source, JITState.IRBuilder->getInt32(Op->DestIdx)); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_CAS: { + auto Op = IROp->C(); + auto Src1 = GetSrc(Op->Header.Args[0]); + auto Src2 = GetSrc(Op->Header.Args[1]); + auto MemSrc = GetSrc(Op->Header.Args[2]); + + MemSrc = JITState.IRBuilder->CreateAdd(MemSrc, JITState.IRBuilder->getInt64(CTX->MemoryMapper.GetBaseOffset(0))); + // Cast the pointer type correctly + MemSrc = JITState.IRBuilder->CreateIntToPtr(MemSrc, Type::getIntNTy(*Con, OpSize * 8)->getPointerTo()); + + Src1 = JITState.IRBuilder->CreateZExtOrTrunc(Src1, MemSrc->getType()->getPointerElementType()); + Src2 = JITState.IRBuilder->CreateZExtOrTrunc(Src2, MemSrc->getType()->getPointerElementType()); + + llvm::Value *Result = JITState.IRBuilder->CreateAtomicCmpXchg(MemSrc, Src1, Src2, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); + + // Result is a { , i1 } So we need to extract it first + // Behaves exactly like std::atomic::compare_exchange_strong(Desired (Src1), Src2) ? Src1 : Desired + Result = JITState.IRBuilder->CreateExtractValue(Result, {0}); + SetDest(*WrapperOp, Result); + break; + } + + case IR::OP_LOADMEM: { + auto Op = IROp->C(); + auto Src = GetSrc(Op->Header.Args[0]); + + Src = JITState.IRBuilder->CreateAdd(Src, JITState.IRBuilder->getInt64(CTX->MemoryMapper.GetBaseOffset(0))); + // Cast the pointer type correctly + Src = JITState.IRBuilder->CreateIntToPtr(Src, Type::getIntNTy(*Con, Op->Size * 8)->getPointerTo()); + auto Result = CreateMemoryLoad(Src); + SetDest(*WrapperOp, Result); + break; + } + case IR::OP_STOREMEM: { + auto Op = IROp->C(); + + auto Dst = GetSrc(Op->Header.Args[0]); + auto Src = GetSrc(Op->Header.Args[1]); + + Dst = JITState.IRBuilder->CreateAdd(Dst, JITState.IRBuilder->getInt64(CTX->MemoryMapper.GetBaseOffset(0))); + auto Type = Type::getIntNTy(*Con, Op->Size * 8); + Src = JITState.IRBuilder->CreateZExtOrTrunc(Src, Type); + Dst = JITState.IRBuilder->CreateIntToPtr(Dst, Type->getPointerTo()); + CreateMemoryStore(Dst, Src); + break; + } + default: + LogMan::Msg::A("Unknown IR Op: %d(%s)", IROp->Op, FEXCore::IR::GetName(IROp->Op).data()); + break; + } + ++*Node; +} + +void* FEXCore::CPU::LLVMJITCore::CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) { + using namespace llvm; + + JumpTargets.clear(); + ForwardJumpTargets.clear(); + JITCurrentState.Blocks.clear(); + + CurrentIR = IR; + +#if DESTMAP_AS_MAP + DestMap.clear(); +#else + uintptr_t ListSize = CurrentIR->GetListSize(); + if (ListSize > DestMap.size()) { + DestMap.resize(std::max(DestMap.size() * 2, ListSize)); + } +#endif + + std::ostringstream FunctionName; + FunctionName << "Function_0x"; + FunctionName << std::hex << ThreadState->State.State.rip; + + auto FunctionModule = new llvm::Module("Module", *Con); + auto EngineBuilder = llvm::EngineBuilder(std::unique_ptr(FunctionModule)); + EngineBuilder.setEngineKind(llvm::EngineKind::JIT); + EngineBuilder.setMCJITMemoryManager(std::unique_ptr(JITState.MemManager)); + + auto Engine = EngineBuilder.create(LLVMTarget); + + Type *i64 = Type::getInt64Ty(*Con); + auto FunctionType = FunctionType::get(Type::getVoidTy(*Con), + { + i64, + }, false); + Func = Function::Create(FunctionType, + Function::ExternalLinkage, + FunctionName.str(), + FunctionModule); + Func->setCallingConv(CallingConv::C); + + { + auto Entry = BasicBlock::Create(*Con, "Entry", Func); + JITCurrentState.Blocks.emplace_back(Entry); + JITState.IRBuilder->SetInsertPoint(Entry); + JITCurrentState.CurrentBlock = Entry; + + CreateGlobalVariables(Engine, FunctionModule); + + auto Builder = JITState.IRBuilder; + + // Let's create the exit block quick + JITCurrentState.ExitBlock = BasicBlock::Create(*Con, "ExitBlock", Func); + JITCurrentState.Blocks.emplace_back(JITCurrentState.ExitBlock); + + JITState.IRBuilder->SetInsertPoint(JITCurrentState.ExitBlock); + Builder->CreateRetVoid(); + + JITState.IRBuilder->SetInsertPoint(Entry); + JITCurrentState.CurrentBlock = Entry; + JITCurrentState.Blocks.emplace_back(JITCurrentState.CurrentBlock); + JITCurrentState.CurrentBlockHasTerm = false; + + IR::NodeWrapperIterator Begin = CurrentIR->begin(); + IR::NodeWrapperIterator End = CurrentIR->end(); + + while (Begin != End) { + HandleIR(CurrentIR, &Begin); + } + } + + for (auto &Block : JITCurrentState.Blocks) { + // If the block is empty then let is just jump to the exit block + if (Block->empty()) { + JITState.IRBuilder->SetInsertPoint(Block); + JITState.IRBuilder->CreateBr(JITCurrentState.ExitBlock); + } + } + + llvm::ModulePassManager FPM; + llvm::ModuleAnalysisManager FAM; + llvm::PassBuilder passBuilder(LLVMTarget); + + passBuilder.registerModuleAnalyses(FAM); + passBuilder.buildModuleSimplificationPipeline( + llvm::PassBuilder::OptimizationLevel::O3, + llvm::PassBuilder::ThinLTOPhase::None); + + raw_ostream &Out = outs(); + + //if (CTX->Config.LLVM_PrinterPass) + if (ThreadState->State.State.rip == 0x4021b0) + { + FPM.addPass(PrintModulePass(Out)); + } + + if (CTX->Config.LLVM_IRValidation) + { + verifyModule(*FunctionModule, &Out); + } + + FPM.run(*FunctionModule, FAM); + Engine->finalizeObject(); + + JITState.Functions.emplace_back(Engine); + + DebugData->HostCodeSize = JITState.MemManager->GetLastCodeAllocation(); + void *FunctionPtr = reinterpret_cast(Engine->getFunctionAddress(FunctionName.str())); + + return FunctionPtr; +} + +FEXCore::CPU::CPUBackend *CreateLLVMCore(FEXCore::Core::InternalThreadState *Thread) { + return new LLVMJITCore(Thread); +} + +} diff --git a/Source/Interface/Core/LLVMJIT/LLVMCore.h b/Source/Interface/Core/LLVMJIT/LLVMCore.h new file mode 100644 index 000000000..ecacdbf9a --- /dev/null +++ b/Source/Interface/Core/LLVMJIT/LLVMCore.h @@ -0,0 +1,11 @@ +#pragma once + +namespace FEXCore::Core { + struct InternalThreadState; +} + +namespace FEXCore::CPU { +class CPUBackend; + +FEXCore::CPU::CPUBackend *CreateLLVMCore(FEXCore::Core::InternalThreadState *Thread); +} diff --git a/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.cpp b/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.cpp new file mode 100644 index 000000000..9884f6c53 --- /dev/null +++ b/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.cpp @@ -0,0 +1,62 @@ +#include "LogManager.h" +#include "Common/MathUtils.h" +#include "Interface/Core/LLVMJIT/LLVMMemoryManager.h" + +#include + +namespace FEXCore::CPU { + +LLVMMemoryManager::LLVMMemoryManager() { + CodeMemory = reinterpret_cast(mmap(nullptr, CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + LogMan::Throw::A(CodeMemory != -1ULL, "Failed to allocate code memory"); +} + +LLVMMemoryManager::~LLVMMemoryManager() { + munmap(reinterpret_cast(CodeMemory), CODE_SIZE); + CodeMemory = 0; +} + +llvm::JITSymbol LLVMMemoryManager::findSymbol(const std::string &Name) { + return llvm::JITSymbol(getSymbolAddress(Name), llvm::JITSymbolFlags::Exported); +} + +uint8_t *LLVMMemoryManager::allocateCodeSection(uintptr_t Size, unsigned Alignment, + [[maybe_unused]] unsigned SectionID, + [[maybe_unused]] llvm::StringRef SectionName) { + size_t Base = AlignUp(AllocateOffset, Alignment); + size_t NewEnd = Base + Size; + + if (NewEnd >= CODE_SIZE) { + LogMan::Msg::A("Tried allocating code and code cache is full!"); + return nullptr; + } + + AllocateOffset = NewEnd; + LastCodeSize = Size; + return reinterpret_cast(CodeMemory + Base); +} + +uint8_t *LLVMMemoryManager::allocateDataSection(uintptr_t Size, unsigned Alignment, + [[maybe_unused]] unsigned SectionID, + [[maybe_unused]] llvm::StringRef SectionName, + [[maybe_unused]] bool IsReadOnly) { + + // Put data section right after code section + size_t Base = AlignUp(AllocateOffset, Alignment); + size_t NewEnd = Base + Size; + + if (NewEnd >= CODE_SIZE) { + LogMan::Msg::A("Tried allocating code and code cache is full!"); + return nullptr; + } + + AllocateOffset = NewEnd; + return reinterpret_cast(CodeMemory + Base); +} + +bool LLVMMemoryManager::finalizeMemory([[maybe_unused]] std::string *ErrMsg) { + return true; +} + +} + diff --git a/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.h b/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.h new file mode 100644 index 000000000..b2496712c --- /dev/null +++ b/Source/Interface/Core/LLVMJIT/LLVMMemoryManager.h @@ -0,0 +1,34 @@ +#pragma once +#include + +namespace FEXCore::CPU { + +class LLVMMemoryManager final : public llvm::RTDyldMemoryManager { +public: + LLVMMemoryManager(); + ~LLVMMemoryManager(); +// uint64_t getSymbolAddress(const std::string &Name) override; + + llvm::JITSymbol findSymbol(const std::string &Name) override; + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + llvm::StringRef SectionName) override; + + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + llvm::StringRef SectionName, + bool IsReadOnly) override; + + bool finalizeMemory(std::string *ErrMsg) override; + + size_t GetLastCodeAllocation() { return LastCodeSize; } + +private: + constexpr static size_t CODE_SIZE = 128 * 1024 * 1024; + uintptr_t CodeMemory {}; + size_t AllocateOffset {}; + + size_t LastCodeSize {}; +}; +} diff --git a/Source/Interface/Core/OpcodeDispatcher.cpp b/Source/Interface/Core/OpcodeDispatcher.cpp new file mode 100644 index 000000000..ea3a6c835 --- /dev/null +++ b/Source/Interface/Core/OpcodeDispatcher.cpp @@ -0,0 +1,3473 @@ + + +#include "Interface/Core/OpcodeDispatcher.h" +#include +#include +#include +#include + +#include + +namespace FEXCore::IR { + +auto OpToIndex = [](uint8_t Op) constexpr -> uint8_t { + switch (Op) { + // Group 1 + case 0x80: return 0; + case 0x81: return 1; + case 0x82: return 2; + case 0x83: return 3; + // Group 2 + case 0xC0: return 0; + case 0xC1: return 1; + case 0xD0: return 2; + case 0xD1: return 3; + case 0xD2: return 4; + case 0xD3: return 5; + // Group 3 + case 0xF6: return 0; + case 0xF7: return 1; + // Group 4 + case 0xFE: return 0; + // Group 5 + case 0xFF: return 0; + // Group 11 + case 0xC6: return 0; + case 0xC7: return 1; + } + return 0; +}; + +#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op + +void OpDispatchBuilder::SyscallOp(OpcodeArgs) { + constexpr size_t SyscallArgs = 7; + constexpr std::array GPRIndexes = { + FEXCore::X86State::REG_RAX, + FEXCore::X86State::REG_RDI, + FEXCore::X86State::REG_RSI, + FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_R10, + FEXCore::X86State::REG_R8, + FEXCore::X86State::REG_R9, + }; + + auto SyscallOp = _Syscall( + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[0] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[1] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[2] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[3] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[4] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[5] * 8), + _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs) + GPRIndexes[6] * 8)); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), SyscallOp); +} + +void OpDispatchBuilder::LEAOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags, false); + StoreResult(Op, Src); +} + +void OpDispatchBuilder::NOPOp(OpcodeArgs) { +} + +void OpDispatchBuilder::RETOp(OpcodeArgs) { + auto Constant = _Constant(8); + + auto OldSP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP])); + + auto NewRIP = _LoadMem(8, OldSP); + + OrderedNode *NewSP; + if (Op->OP == 0xC2) { + auto Offset = LoadSource(Op, Op->Src1, Op->Flags); + NewSP = _Add(_Add(OldSP, Constant), Offset); + } + else { + NewSP = _Add(OldSP, Constant); + } + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + // Store the new RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), NewRIP); + _EndFunction(); + Information.HadUnconditionalExit = true; +} + +void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) { + FEXCore::IR::IROps IROp; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 0): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0): + IROp = FEXCore::IR::IROps::OP_ADD; + break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1): + IROp = FEXCore::IR::IROps::OP_OR; + break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 4): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 4): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4): + IROp = FEXCore::IR::IROps::OP_AND; + break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 5): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5): + IROp = FEXCore::IR::IROps::OP_SUB; + break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 5): + case OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 5): + IROp = FEXCore::IR::IROps::OP_MUL; + break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6): + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6): + IROp = FEXCore::IR::IROps::OP_XOR; + break; + default: + IROp = FEXCore::IR::IROps::OP_LAST; + LogMan::Msg::A("Unknown ALU Op: 0x%x", Op->OP); + break; + }; +#undef OPD + // X86 basic ALU ops just do the operation between the destination and a single source + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _Add(Dest, Src); + // Overwrite our IR's op type + ALUOp.first->Header.Op = IROp; + + StoreResult(Op, ALUOp); + + // Flags set + { + auto Size = GetSrcSize(Op) * 8; + switch (IROp) { + case FEXCore::IR::IROps::OP_ADD: + GenerateFlags_ADD(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + case FEXCore::IR::IROps::OP_SUB: + GenerateFlags_SUB(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + case FEXCore::IR::IROps::OP_MUL: + GenerateFlags_MUL(Op, _Bfe(Size, 0, ALUOp), _MulH(Dest, Src)); + break; + case FEXCore::IR::IROps::OP_AND: + case FEXCore::IR::IROps::OP_XOR: + case FEXCore::IR::IROps::OP_OR: { + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + } + default: break; + } + } +} + +void OpDispatchBuilder::ADCOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC); + + auto ALUOp = _Add(_Add(Dest, Src), CF); + + StoreResult(Op, ALUOp); + GenerateFlags_ADC(Op, ALUOp, Dest, Src, CF); +} + +void OpDispatchBuilder::SBBOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC); + + auto ALUOp = _Sub(_Sub(Dest, Src), CF); + StoreResult(Op, ALUOp); + GenerateFlags_SBB(Op, ALUOp, Dest, Src, CF); +} + +void OpDispatchBuilder::PUSHOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + auto Constant = _Constant(Size); + + auto OldSP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP])); + + auto NewSP = _Sub(OldSP, Constant); + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + OrderedNode *Src; + if (Op->OP == 0x68 || Op->OP == 0x6A) { // Immediate Push + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + else { + if (Op->OP == 0xFF && Size == 4) LogMan::Msg::A("Woops. Can't do 32bit for this PUSH op"); + Src = LoadSource(Op, Op->Dest, Op->Flags); + } + + // Store our value to the new stack location + _StoreMem(Size, NewSP, Src); +} + +void OpDispatchBuilder::POPOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + auto Constant = _Constant(Size); + + auto OldSP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP])); + + auto NewGPR = _LoadMem(Size, OldSP); + + auto NewSP = _Add(OldSP, Constant); + + if (Op->OP == 0x8F && Size == 4) LogMan::Msg::A("Woops. Can't do 32bit for this POP op"); + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + // Store what we loaded from the stack + StoreResult(Op, NewGPR); +} + +void OpDispatchBuilder::LEAVEOp(OpcodeArgs) { + // First we move RBP in to RSP and then behave effectively like a pop + uint8_t Size = GetSrcSize(Op); + auto Constant = _Constant(Size); + + LogMan::Throw::A(Size == 8, "Can't handle a LEAVE op with size %d", Size); + + auto OldBP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBP])); + + auto NewGPR = _LoadMem(Size, OldBP); + + auto NewSP = _Add(OldBP, Constant); + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + // Store what we loaded to RBP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBP]), NewGPR); +} + +void OpDispatchBuilder::CALLOp(OpcodeArgs) { + auto ConstantPC = _Constant(Op->PC + Op->InstSize); + + OrderedNode *JMPPCOffset = LoadSource(Op, Op->Src1, Op->Flags); + + auto NewRIP = _Add(JMPPCOffset, ConstantPC); + + auto ConstantPCReturn = _Constant(Op->PC + Op->InstSize); + + auto ConstantSize = _Constant(8); + auto OldSP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP])); + + auto NewSP = _Sub(OldSP, ConstantSize); + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + _StoreMem(8, NewSP, ConstantPCReturn); + + // Store the RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), NewRIP); + _ExitFunction(); // If we get here then leave the function now + + // Fracking RIPSetter check ending the block causes issues + // Split the block and leave early to work around the bug + _EndBlock(0); + // Make sure to start a new block after ending this one + _BeginBlock(); + Information.HadUnconditionalExit = true; +} + +void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) { + OrderedNode *JMPPCOffset = LoadSource(Op, Op->Src1, Op->Flags); + + auto ConstantPCReturn = _Constant(Op->PC + Op->InstSize); + + auto ConstantSize = _Constant(8); + auto OldSP = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP])); + + auto NewSP = _Sub(OldSP, ConstantSize); + + // Store the new stack pointer + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]), NewSP); + + _StoreMem(8, NewSP, ConstantPCReturn); + + // Store the RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), JMPPCOffset); + _ExitFunction(); // If we get here then leave the function now + + // Fracking RIPSetter check ending the block causes issues + // Split the block and leave early to work around the bug + _EndBlock(0); + // Make sure to start a new block after ending this one + _BeginBlock(); + Information.HadUnconditionalExit = true; + +} + +void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) { + enum CompareType { + COMPARE_ZERO, + COMPARE_NOTZERO, + COMPARE_EQUALMASK, + COMPARE_OTHER, + }; + uint32_t FLAGMask; + CompareType Type = COMPARE_OTHER; + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + IRPair SrcCond; + + switch (Op->OP) { + case 0x70: + case 0x80: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x71: + case 0x81: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_ZERO; + break; + case 0x72: + case 0x82: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x73: + case 0x83: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_ZERO; + break; + case 0x74: + case 0x84: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x75: + case 0x85: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_ZERO; + break; + case 0x76: + case 0x86: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC); + auto Check = _Or(Flag1, Flag2); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, OneConst, ZeroConst, OneConst); + break; + } + case 0x77: + case 0x87: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC); + auto Check = _Or(Flag1, _Lshl(Flag2, _Constant(1))); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, ZeroConst, ZeroConst, OneConst); + break; + } + case 0x78: + case 0x88: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x79: + case 0x89: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_ZERO; + break; + case 0x7A: + case 0x8A: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x7B: + case 0x8B: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_ZERO; + break; + case 0x7C: // SF <> OF + case 0x8C: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_NEQ, + Flag1, Flag2, ZeroConst, OneConst); + break; + } + case 0x7D: // SF = OF + case 0x8D: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Flag1, Flag2, ZeroConst, OneConst); + break; + } + case 0x7E: // ZF = 1 || SF <> OF + case 0x8E: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, OneConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_NEQ, + Flag2, Flag3, OneConst, ZeroConst); + + auto Check = _Or(Select1, Select2); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, OneConst, ZeroConst, OneConst); + break; + } + case 0x7F: // ZF = 0 && SF = OF + case 0x8F: { + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, ZeroConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_EQ, + Flag2, Flag3, OneConst, ZeroConst); + + auto Check = _And(Select1, Select2); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, OneConst, ZeroConst, OneConst); + break; + } + default: LogMan::Msg::A("Unknown Jmp Op: 0x%x\n", Op->OP); return; + } + + if (Type != COMPARE_OTHER) { + auto MaskConst = _Constant(FLAGMask); + + auto RFLAG = GetPackedRFLAG(false); + + auto AndOp = _And(RFLAG, MaskConst); + + switch (Type) { + case COMPARE_ZERO: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, ZeroConst, ZeroConst, OneConst); + break; + } + case COMPARE_NOTZERO: { + SrcCond = _Select(FEXCore::IR::COND_NEQ, + AndOp, ZeroConst, ZeroConst, OneConst); + break; + } + case COMPARE_EQUALMASK: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, MaskConst, ZeroConst, OneConst); + break; + } + case COMPARE_OTHER: break; + } + } + + // The conditions of the previous conditional branches are inverted from what you expect on the x86 side + // This inversion exists because our condjump needs to jump over code that sets the RIP to the target conditionally + // XXX: Reenable +#if 0 + if (ConfigMultiblock()) { + auto CondJump = _CondJump(); + CondJump.first->Header.NumArgs = 1; + CondJump.first->Cond = SrcCond; + + _EndBlock(0); + // Make sure to start a new block after ending this one + _BeginBlock(); + + uint64_t Target = Op->PC + Op->InstSize + Op->Src1.TypeLiteral.Literal; + if (false && Target > Op->PC) { + // If we are forward jumping: Add the IR Op to the fixup list + auto it = Arguments.Fixups.find(Target); + if (it == Arguments.Fixups.end()) { + std::vector empty; + it = Arguments.Fixups.emplace(std::make_pair(Target, empty)).first; + } + it->second.emplace_back(IRArguments::Fixup{&CondJump.first->Header}); + return; + } + else if (false && Target <= Op->PC) { + // If we are jumping backwards then we should have a jump target available in our jump targets list + auto it = Arguments.JumpTargets.find(Target); + if (it != Arguments.JumpTargets.end()) { + CondJump.first->Location = it->second; + return; + } + } + } +#endif + // Fallback + { + // XXX: Test + GetPackedRFLAG(false); + + auto CondJump = _CondJump(); + CondJump.first->Header.NumArgs = 1; + CondJump.first->Header.Args[0] = SrcCond.Node->Wrapped(ListData.Begin()); + + auto RIPOffset = LoadSource(Op, Op->Src1, Op->Flags); + auto RIPTargetConst = _Constant(Op->PC + Op->InstSize); + + auto NewRIP = _Add(RIPOffset, RIPTargetConst); + + // Store the new RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), NewRIP); + _ExitFunction(); + + _EndBlock(0); + + // Make sure to start a new block after ending this one + auto JumpTarget = _BeginBlock(); + // This very explicitly avoids the isDest path for Ops. We want the actual destination here + CondJump.first->Header.Args[1] = JumpTarget.Node->Wrapped(ListData.Begin()); + } +} + +void OpDispatchBuilder::JUMPOp(OpcodeArgs) { + // This is just an unconditional relative literal jump + // XXX: Reenable +#if 0 + if (ConfigMultiblock()) { + uint64_t Target = Op->PC + Op->InstSize + Op->Src1.TypeLiteral.Literal; + if (false && Target > Op->PC) { + // If we are forward jumping: Add the IR Op to the fixup list + auto it = Arguments.Fixups.find(Target); + if (it == Arguments.Fixups.end()) { + std::vector empty; + it = Arguments.Fixups.emplace(std::make_pair(Target, empty)).first; + } + auto Jump = _Jump(); + it->second.emplace_back(IRArguments::Fixup{&Jump.first->Header}); + return; + } + else if (Target <= Op->PC) { + // If we are jumping backwards then we should have a jump target available in our jump targets list + auto it = Arguments.JumpTargets.find(Target); + + if (it != Arguments.JumpTargets.end()) { + auto Jump = _Jump(); + Jump.first->Location = it->second; + return; + } + } + } +#endif + + // Fallback + { + // This source is a literal + auto RIPOffset = LoadSource(Op, Op->Src1, Op->Flags); + + auto RIPTargetConst = _Constant(Op->PC + Op->InstSize); + + auto NewRIP = _Add(RIPOffset, RIPTargetConst); + + // Store the new RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), NewRIP); + _ExitFunction(); + + _EndBlock(0); + + // Make sure to start a new block after ending this one + _BeginBlock(); + Information.HadUnconditionalExit = true; + } +} + +void OpDispatchBuilder::JUMPAbsoluteOp(OpcodeArgs) { + // This is just an unconditional jump + // This uses ModRM to determine its location + // No way to use this effectively in multiblock + auto RIPOffset = LoadSource(Op, Op->Src1, Op->Flags); + + // Store the new RIP + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), RIPOffset); + _ExitFunction(); + + _EndBlock(0); + + // Make sure to start a new block after ending this one + _BeginBlock(); + Information.HadUnconditionalExit = true; + +} + +void OpDispatchBuilder::SETccOp(OpcodeArgs) { + enum CompareType { + COMPARE_ZERO, + COMPARE_NOTZERO, + COMPARE_EQUALMASK, + COMPARE_OTHER, + }; + uint32_t FLAGMask; + CompareType Type = COMPARE_OTHER; + OrderedNode *SrcCond; + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + switch (Op->OP) { + case 0x90: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x91: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_ZERO; + break; + case 0x92: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x93: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_ZERO; + break; + case 0x94: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x95: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_ZERO; + break; + case 0x96: + FLAGMask = (1 << FEXCore::X86State::RFLAG_ZF_LOC) | (1 << FEXCore::X86State::RFLAG_CF_LOC); + Type = COMPARE_NOTZERO; + break; + case 0x97: + FLAGMask = (1 << FEXCore::X86State::RFLAG_ZF_LOC) | (1 << FEXCore::X86State::RFLAG_CF_LOC); + Type = COMPARE_ZERO; + break; + case 0x98: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x99: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_ZERO; + break; + case 0x9A: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x9B: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_ZERO; + break; + case 0x9D: { // SF = OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Flag1, Flag2, OneConst, ZeroConst); + break; + } + case 0x9C: { // SF <> OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_NEQ, + Flag1, Flag2, OneConst, ZeroConst); + break; + } + case 0x9E: { // ZF = 1 || SF <> OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, OneConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_NEQ, + Flag2, Flag3, OneConst, ZeroConst); + SrcCond = _Or(Select1, Select2); + break; + } + case 0x9F: { // ZF = 0 && SF = OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, ZeroConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_EQ, + Flag2, Flag3, OneConst, ZeroConst); + SrcCond = _And(Select1, Select2); + break; + } + default: + LogMan::Msg::A("Unhandled SetCC op: 0x%x", Op->OP); + break; + } + + if (Type != COMPARE_OTHER) { + auto MaskConst = _Constant(FLAGMask); + + auto RFLAG = GetPackedRFLAG(false); + + auto AndOp = _And(RFLAG, MaskConst); + + switch (Type) { + case COMPARE_ZERO: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, ZeroConst, OneConst, ZeroConst); + break; + } + case COMPARE_NOTZERO: { + SrcCond = _Select(FEXCore::IR::COND_NEQ, + AndOp, ZeroConst, OneConst, ZeroConst); + break; + } + case COMPARE_EQUALMASK: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, MaskConst, OneConst, ZeroConst); + break; + } + case COMPARE_OTHER: break; + } + } + + StoreResult(Op, SrcCond); +} + + +void OpDispatchBuilder::TESTOp(OpcodeArgs) { + // TEST is an instruction that does an AND between the sources + // Result isn't stored in result, only writes to flags + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _And(Dest, Src); + auto Size = GetSrcSize(Op) * 8; + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); +} + +void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) { + // This instruction is a bit special + // if SrcSize == 2 + // Then lower 16 bits of destination is written without changing the upper 48 bits + // else /* Size == 4 */ + // if REX_WIDENING: + // Sext(32, Src) + // else + // Zext(32, Src) + // + uint8_t Size = std::min(static_cast(4), GetSrcSize(Op)); + + OrderedNode *Src = LoadSource_WithOpSize(Op, Op->Src1, Size, Op->Flags); + if (Size == 2) { + // This'll make sure to insert in to the lower 16bits without modifying upper bits + StoreResult_WithOpSize(Op, Op->Dest, Src, Size); + } + else if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING) { + // With REX.W then Sext + Src = _Sext(Size * 8, Src); + StoreResult(Op, Src); + } + else { + // Without REX.W then Zext + Src = _Zext(Size * 8, Src); + StoreResult(Op, Src); + } +} + +void OpDispatchBuilder::MOVSXOp(OpcodeArgs) { + // This will ZExt the loaded size + // We want to Sext it + uint8_t Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + Src = _Sext(Size * 8, Src); + StoreResult(Op, Op->Dest, Src); +} + +void OpDispatchBuilder::MOVZXOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + // Just make sure this is zero extended + Src = _Zext(Size * 8, Src); + StoreResult(Op, Src); +} + +void OpDispatchBuilder::CMPOp(OpcodeArgs) { + // CMP is an instruction that does a SUB between the sources + // Result isn't stored in result, only writes to flags + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetDstSize(Op) * 8; + auto ALUOp = _Sub(Dest, Src); + GenerateFlags_SUB(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); +} + +void OpDispatchBuilder::CQOOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + auto BfeOp = _Bfe(1, GetSrcSize(Op) * 8 - 1, Src); + auto ZeroConst = _Constant(0); + auto MaxConst = _Constant(~0ULL); + auto SelectOp = _Select(FEXCore::IR::COND_EQ, BfeOp, ZeroConst, ZeroConst, MaxConst); + + StoreResult(Op, SelectOp); +} + +void OpDispatchBuilder::XCHGOp(OpcodeArgs) { + // Load both the source and the destination + if (Op->OP == 0x90 && + GetSrcSize(Op) >= 4 && + Op->Src1.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR && + Op->Src1.TypeGPR.GPR == FEXCore::X86State::REG_RAX && + Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR && + Op->Dest.TypeGPR.GPR == FEXCore::X86State::REG_RAX) { + // This is one heck of a sucky special case + // If we are the 0x90 XCHG opcode (Meaning source is GPR RAX) + // and destination register is ALSO RAX + // and in this very specific case we are 32bit or above + // Then this is a no-op + // This is because 0x90 without a prefix is technically `xchg eax, eax` + // But this would result in a zext on 64bit, which would ruin the no-op nature of the instruction + // So x86-64 spec mandates this special case that even though it is a 32bit instruction and + // is supposed to zext the result, it is a true no-op + return; + } + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + // Swap the contents + // Order matters here since we don't want to swap context contents for one that effects the other + StoreResult(Op, Op->Dest, Src); + StoreResult(Op, Op->Src1, Dest); +} + +void OpDispatchBuilder::CDQOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + // Op size is destination size + // Therefore sext OpSize/2 + uint8_t Size = GetSrcSize(Op); + Src = _Sext(Size * 8, Src); + if (Size == 4) + Src = _Zext(Size * 2 * 8, Src); + + StoreResult(Op, Src); +} + +void OpDispatchBuilder::SAHFOp(OpcodeArgs) { + OrderedNode *Src = _LoadContext(1, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]) + 1); + + // Clear bits that aren't supposed to be set + Src = _And(Src, _Constant(~0b101000)); + + // Set the bit that is always set here + Src = _Or(Src, _Constant(0b10)); + + // Store the lower 8 bits in to RFLAGS + SetPackedRFLAG(true, Src); +} +void OpDispatchBuilder::LAHFOp(OpcodeArgs) { + // Load the lower 8 bits of the Rflags register + auto RFLAG = GetPackedRFLAG(true); + + // Store the lower 8 bits of the rflags register in to AH + _StoreContext(1, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]) + 1, RFLAG); +} + +void OpDispatchBuilder::FLAGControlOp(OpcodeArgs) { + enum OpType { + OP_CLEAR, + OP_SET, + OP_COMPLEMENT, + }; + OpType Type; + uint64_t Flag; + switch (Op->OP) { + case 0xF5: // CMC + Flag= FEXCore::X86State::RFLAG_CF_LOC; + Type = OP_COMPLEMENT; + break; + case 0xF8: // CLC + Flag= FEXCore::X86State::RFLAG_CF_LOC; + Type = OP_CLEAR; + break; + case 0xF9: // STC + Flag= FEXCore::X86State::RFLAG_CF_LOC; + Type = OP_SET; + break; + case 0xFC: // CLD + Flag= FEXCore::X86State::RFLAG_DF_LOC; + Type = OP_CLEAR; + break; + case 0xFD: // STD + Flag= FEXCore::X86State::RFLAG_DF_LOC; + Type = OP_SET; + break; + } + + OrderedNode *Result{}; + switch (Type) { + case OP_CLEAR: { + Result = _Constant(0); + break; + } + case OP_SET: { + Result = _Constant(1); + break; + } + case OP_COMPLEMENT: { + auto RFLAG = GetRFLAG(Flag); + Result = _Xor(RFLAG, _Constant(1)); + break; + } + } + + SetRFLAG(Result, Flag); +} + +void OpDispatchBuilder::MOVSegOp(OpcodeArgs) { + // In x86-64 mode the accesses to the segment registers end up being constant zero moves + // Aside from FS/GS + LogMan::Msg::A("Wanting reg: %d\n", Op->Src1.TypeGPR.GPR); + // StoreResult(Op, Src); +} + +void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) { + OrderedNode *Src; + const FEXCore::X86Tables::DecodedOperand *Dest; + + switch (Op->OP) { + case 0xA0: + case 0xA1: + // Source is memory(literal) + // Dest is GPR + Src = LoadSource(Op, Op->Src1, Op->Flags, true); + Dest = &Op->Dest; + break; + case 0xA2: + case 0xA3: + // Source is GPR + // Dest is memory(literal) + Src = LoadSource(Op, Op->Src1, Op->Flags); + Dest = &Op->Src2; + break; + } + StoreResult(Op, *Dest, Src); +} + +void OpDispatchBuilder::CMOVOp(OpcodeArgs) { + enum CompareType { + COMPARE_ZERO, + COMPARE_NOTZERO, + COMPARE_EQUALMASK, + COMPARE_OTHER, + }; + uint32_t FLAGMask; + CompareType Type = COMPARE_OTHER; + OrderedNode *SrcCond; + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + switch (Op->OP) { + case 0x40: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x41: + FLAGMask = 1 << FEXCore::X86State::RFLAG_OF_LOC; + Type = COMPARE_ZERO; + break; + case 0x42: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x43: + FLAGMask = 1 << FEXCore::X86State::RFLAG_CF_LOC; + Type = COMPARE_ZERO; + break; + case 0x44: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x45: + FLAGMask = 1 << FEXCore::X86State::RFLAG_ZF_LOC; + Type = COMPARE_ZERO; + break; + case 0x46: + FLAGMask = (1 << FEXCore::X86State::RFLAG_ZF_LOC) | (1 << FEXCore::X86State::RFLAG_CF_LOC); + Type = COMPARE_NOTZERO; + break; + case 0x47: + FLAGMask = (1 << FEXCore::X86State::RFLAG_ZF_LOC) | (1 << FEXCore::X86State::RFLAG_CF_LOC); + Type = COMPARE_ZERO; + break; + case 0x48: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x49: + FLAGMask = 1 << FEXCore::X86State::RFLAG_SF_LOC; + Type = COMPARE_ZERO; + break; + case 0x4A: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_NOTZERO; + break; + case 0x4B: + FLAGMask = 1 << FEXCore::X86State::RFLAG_PF_LOC; + Type = COMPARE_ZERO; + break; + case 0x4C: { // SF <> OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_NEQ, + Flag1, Flag2, Src, Dest); + break; + } + case 0x4D: { // SF = OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + SrcCond = _Select(FEXCore::IR::COND_EQ, + Flag1, Flag2, Src, Dest); + break; + } + + case 0x4E: { // ZF = 1 || SF <> OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, OneConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_NEQ, + Flag2, Flag3, OneConst, ZeroConst); + auto Check = _Or(Select1, Select2); + + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, OneConst, Src, Dest); + break; + } + + case 0x4F: { // ZF = 0 && SSF = OF + auto Flag1 = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC); + auto Flag2 = GetRFLAG(FEXCore::X86State::RFLAG_SF_LOC); + auto Flag3 = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + + auto Select1 = _Select(FEXCore::IR::COND_EQ, + Flag1, ZeroConst, OneConst, ZeroConst); + + auto Select2 = _Select(FEXCore::IR::COND_EQ, + Flag2, Flag3, OneConst, ZeroConst); + auto Check = _And(Select1, Select2); + + SrcCond = _Select(FEXCore::IR::COND_EQ, + Check, OneConst, Src, Dest); + break; + } + default: + LogMan::Msg::A("Unhandled CMOV op: 0x%x", Op->OP); + break; + } + + if (Type != COMPARE_OTHER) { + auto MaskConst = _Constant(FLAGMask); + + auto RFLAG = GetPackedRFLAG(false); + + auto AndOp = _And(RFLAG, MaskConst); + + switch (Type) { + case COMPARE_ZERO: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, ZeroConst, Src, Dest); + break; + } + case COMPARE_NOTZERO: { + SrcCond = _Select(FEXCore::IR::COND_NEQ, + AndOp, ZeroConst, Src, Dest); + break; + } + case COMPARE_EQUALMASK: { + SrcCond = _Select(FEXCore::IR::COND_EQ, + AndOp, MaskConst, Src, Dest); + break; + } + + case COMPARE_OTHER: break; + } + } + + StoreResult(Op, SrcCond); +} + +void OpDispatchBuilder::CPUIDOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + auto Res = _CPUID(Src); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), _ExtractElement(Res, 0)); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBX]), _ExtractElement(Res, 1)); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), _ExtractElement(Res, 2)); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX]), _ExtractElement(Res, 3)); +} +void OpDispatchBuilder::SHLOp(OpcodeArgs) { + bool SHL1Bit = false; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4): + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4): + SHL1Bit = true; + break; + } +#undef OPD + OrderedNode *Src; + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + if (SHL1Bit) { + Src = _Constant(1); + } + else { + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + auto Size = GetSrcSize(Op) * 8; + + // x86 masks the shift by 0x3F or 0x1F depending on size of op + if (Size == 64) + Src = _And(Src, _Constant(0x3F)); + else + Src = _And(Src, _Constant(0x1F)); + + auto ALUOp = _Lshl(Dest, Src); + + StoreResult(Op, ALUOp); + + // XXX: This isn't correct + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); +} + +void OpDispatchBuilder::SHROp(OpcodeArgs) { + bool SHR1Bit = false; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5): + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5): + SHR1Bit = true; + break; + } +#undef OPD + + OrderedNode *Src; + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + if (SHR1Bit) { + Src = _Constant(1); + } + else { + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + + auto Size = GetSrcSize(Op) * 8; + + // x86 masks the shift by 0x3F or 0x1F depending on size of op + if (Size == 64) + Src = _And(Src, _Constant(0x3F)); + else + Src = _And(Src, _Constant(0x1F)); + + auto ALUOp = _Lshr(Dest, Src); + + StoreResult(Op, ALUOp); + + // XXX: This isn't correct + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + + if (SHR1Bit) { + SetRFLAG(_Bfe(1, Size - 1, Src)); + } +} + +void OpDispatchBuilder::ASHROp(OpcodeArgs) { + bool SHR1Bit = false; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7): + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7): + SHR1Bit = true; + break; + } +#undef OPD + + OrderedNode *Src; + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op) * 8; + + if (SHR1Bit) { + Src = _Constant(Size, 1); + } + else { + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + + + // x86 masks the shift by 0x3F or 0x1F depending on size of op + if (Size == 64) + Src = _And(Src, _Constant(Size, 0x3F)); + else + Src = _And(Src, _Constant(Size, 0x1F)); + + auto ALUOp = _Ashr(Dest, Src); + + StoreResult(Op, ALUOp); + + // XXX: This isn't correct + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + + if (SHR1Bit) { + SetRFLAG(_Bfe(1, Size - 1, Src)); + } +} + +void OpDispatchBuilder::ROROp(OpcodeArgs) { + bool Is1Bit = false; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1): + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1): + Is1Bit = true; + break; + } +#undef OPD + + OrderedNode *Src; + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op) * 8; + if (Is1Bit) { + Src = _Constant(Size, 1); + } + else { + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + + // x86 masks the shift by 0x3F or 0x1F depending on size of op + if (Size == 64) + Src = _And(Src, _Constant(Size, 0x3F)); + else + Src = _And(Src, _Constant(Size, 0x1F)); + + auto ALUOp = _Ror(Dest, Src); + + StoreResult(Op, ALUOp); + + // XXX: This is incorrect + GenerateFlags_Rotate(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + + if (Is1Bit) { + SetRFLAG(_Bfe(1, Size - 1, Src)); + } +} +void OpDispatchBuilder::ROLOp(OpcodeArgs) { + bool Is1Bit = false; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + switch (Op->OP) { + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0): + case OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0): + Is1Bit = true; + break; + default: break; + } +#undef OPD + + OrderedNode *Src; + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op) * 8; + + if (Is1Bit) { + Src = _Constant(Size, 1); + } + else { + Src = LoadSource(Op, Op->Src1, Op->Flags); + } + + // x86 masks the shift by 0x3F or 0x1F depending on size of op + if (Size == 64) + Src = _And(Src, _Constant(Size, 0x3F)); + else + Src = _And(Src, _Constant(Size, 0x1F)); + + auto ALUOp = _Rol(Dest, Src); + + StoreResult(Op, ALUOp); + + // XXX: This is incorrect + GenerateFlags_Rotate(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + + if (Is1Bit) { + SetRFLAG(_Bfe(1, Size - 1, Src)); + } +} + +void OpDispatchBuilder::BTOp(OpcodeArgs) { + OrderedNode *Result; + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + if (Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + Result = _Lshr(Dest, Src); + } + else { + // Load the address to the memory location + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags, false); + uint32_t Size = GetSrcSize(Op); + uint32_t Mask = Size * 8 - 1; + OrderedNode *SizeMask = _Constant(Mask); + OrderedNode *AddressShift = _Constant(32 - __builtin_clz(Mask)); + + // Get the bit selection from the src + OrderedNode *BitSelect = _And(Src, SizeMask); + + // First shift out the selection bits + Src = _Lshr(Src, AddressShift); + + // Now multiply by operand size to get correct indexing + if (Size != 1) { + Src = _Lshl(Src, _Constant(Size - 1)); + } + + // Get the address offset by shifting out the size of the op (To shift out the bit selection) + // Then use that to index in to the memory location by size of op + + // Now add the addresses together and load the memory + OrderedNode *MemoryLocation = _Add(Dest, Src); + Result = _LoadMem(Size, MemoryLocation); + + // Now shift in to the correct bit location + Result = _Lshr(Result, BitSelect); + } + SetRFLAG(Result); +} + +void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) { + OrderedNode *Src1 = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src2 = LoadSource(Op, Op->Src1, Op->Flags); + + auto Dest = _Mul(Src1, Src2); + StoreResult(Op, Dest); + GenerateFlags_MUL(Op, Dest, _MulH(Src1, Src2)); +} + +void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) { + OrderedNode *Src1 = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Src2 = LoadSource(Op, Op->Src2, Op->Flags); + + auto Dest = _Mul(Src1, Src2); + StoreResult(Op, Dest); + GenerateFlags_MUL(Op, Dest, _MulH(Src1, Src2)); +} + +void OpDispatchBuilder::IMULOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + OrderedNode *Src1 = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + + if (Size != 8) { + Src1 = _Sext(Size * 8, Src1); + Src2 = _Sext(Size * 8, Src2); + } + + OrderedNode *Result = _Mul(Src1, Src2); + OrderedNode *ResultHigh{}; + if (Size == 1) { + // Result is stored in AX + _StoreContext(2, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + ResultHigh = _Bfe(8, 8, Result); + ResultHigh = _Sext(Size * 8, ResultHigh); + } + else if (Size == 2) { + // 16bits stored in AX + // 16bits stored in DX + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + ResultHigh = _Bfe(16, 16, Result); + ResultHigh = _Sext(Size * 8, ResultHigh); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + } + else if (Size == 4) { + // 32bits stored in EAX + // 32bits stored in EDX + // Make sure they get Zext correctly + OrderedNode *ResultLow = _Bfe(32, 0, Result); + ResultLow = _Zext(Size * 8, ResultLow); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), ResultLow); + ResultHigh = _Bfe(32, 32, Result); + ResultHigh = _Zext(Size * 8, ResultHigh); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + } + else if (Size == 8) { + // 64bits stored in RAX + // 64bits stored in RDX + ResultHigh = _MulH(Src1, Src2); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + } + + GenerateFlags_MUL(Op, Result, ResultHigh); +} + +void OpDispatchBuilder::MULOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + OrderedNode *Src1 = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + if (Size != 8) { + Src1 = _Zext(Size * 8, Src1); + Src2 = _Zext(Size * 8, Src2); + } + OrderedNode *Result = _UMul(Src1, Src2); + OrderedNode *ResultHigh{}; + + if (Size == 1) { + // Result is stored in AX + _StoreContext(2, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + ResultHigh = _Bfe(8, 8, Result); + } + else if (Size == 2) { + // 16bits stored in AX + // 16bits stored in DX + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + ResultHigh = _Bfe(16, 16, Result); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + } + else if (Size == 4) { + // 32bits stored in EAX + // 32bits stored in EDX + // Make sure they get Zext correctly + OrderedNode *ResultLow = _Bfe(32, 0, Result); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), ResultLow); + ResultHigh = _Bfe(32, 32, Result); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + + } + else if (Size == 8) { + // 64bits stored in RAX + // 64bits stored in RDX + ResultHigh = _UMulH(Src1, Src2); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), Result); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), ResultHigh); + } + + GenerateFlags_UMUL(Op, ResultHigh); +} + +void OpDispatchBuilder::NOTOp(OpcodeArgs) { + uint8_t Size = GetSrcSize(Op); + OrderedNode *MaskConst{}; + if (Size == 8) { + MaskConst = _Constant(~0ULL); + } + else { + MaskConst = _Constant((1ULL << (Size * 8)) - 1); + } + + OrderedNode *Src = LoadSource(Op, Op->Dest, Op->Flags); + Src = _Xor(Src, MaskConst); + StoreResult(Op, Src); +} + +void OpDispatchBuilder::RDTSCOp(OpcodeArgs) { + auto Counter = _CycleCounter(); + auto CounterLow = _Bfe(32, 0, Counter); + auto CounterHigh = _Bfe(32, 32, Counter); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), CounterLow); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), CounterHigh); +} + +void OpDispatchBuilder::INCOp(OpcodeArgs) { + if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX) { + LogMan::Msg::A("Can't handle REP on this\n"); + } + + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + auto OneConst = _Constant(1); + auto ALUOp = _Add(Dest, OneConst); + + StoreResult(Op, ALUOp); + + auto Size = GetSrcSize(Op) * 8; + GenerateFlags_ADD(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, OneConst)); +} + +void OpDispatchBuilder::DECOp(OpcodeArgs) { + if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX) { + LogMan::Msg::A("Can't handle REP on this\n"); + } + + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + auto OneConst = _Constant(1); + auto ALUOp = _Sub(Dest, OneConst); + + StoreResult(Op, ALUOp); + + auto Size = GetSrcSize(Op) * 8; + GenerateFlags_SUB(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, OneConst)); +} + +void OpDispatchBuilder::STOSOp(OpcodeArgs) { + if (!(Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX)) { + LogMan::Msg::A("Can't handle REP not existing on STOS\n"); + } + auto Size = GetSrcSize(Op); + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + auto SizeConst = _Constant(Size); + auto NegSizeConst = _Constant(-Size); + + auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC); + auto PtrDir = _Select(FEXCore::IR::COND_EQ, + DF, ZeroConst, + SizeConst, NegSizeConst); + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + auto JumpStart = _Jump(); + _EndBlock(0); + + // Make sure to start a new block after ending this one + auto LoopStart = _BeginBlock(); + JumpStart.first->Header.Args[0] = LoopStart.Node->Wrapped(ListData.Begin()); + + OrderedNode *Counter = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX])); + + OrderedNode *Dest = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI])); + + // Store to memory where RDI points + _StoreMem(Size, Dest, Src); + + // Can we end the block? + auto CanLeaveCond = _Select(FEXCore::IR::COND_EQ, + Counter, ZeroConst, + OneConst, ZeroConst); + auto CondJump = _CondJump(); + CondJump.first->Header.NumArgs = 1; + CondJump.first->Header.Args[0] = CanLeaveCond.Node->Wrapped(ListData.Begin()); + + // Decrement counter + Counter = _Sub(Counter, OneConst); + + // Store the counter so we don't have to deal with PHI here + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX]), Counter); + + // Offset the pointer + Dest = _Add(Dest, PtrDir); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI]), Dest); + + // Jump back to the start, we have more work to do + _Jump(LoopStart); + _EndBlock(0); + // Make sure to start a new block after ending this one + auto LoopEnd = _BeginBlock(); + CondJump.first->Header.Args[1] = LoopEnd.Node->Wrapped(ListData.Begin()); +} + +void OpDispatchBuilder::MOVSOp(OpcodeArgs) { + if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX) { + LogMan::Msg::A("Can't handle REP\n"); + } + + _Break(0, 0); +} +void OpDispatchBuilder::CMPSOp(OpcodeArgs) { + if (!(Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX)) { + LogMan::Msg::A("Can't only handle REP\n"); + } + + auto Size = GetSrcSize(Op); + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + auto SizeConst = _Constant(Size); + auto NegSizeConst = _Constant(-Size); + + auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC); + auto PtrDir = _Select(FEXCore::IR::COND_EQ, + DF, ZeroConst, + SizeConst, NegSizeConst); + + auto JumpStart = _Jump(); + _EndBlock(0); + // Make sure to start a new block after ending this one + auto LoopStart = _BeginBlock(); + JumpStart.first->Header.Args[0] = LoopStart.Node->Wrapped(ListData.Begin()); + + OrderedNode *Counter = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX])); + OrderedNode *Dest_RDI = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI])); + OrderedNode *Dest_RSI = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSI])); + + auto Src1 = _LoadMem(Size, Dest_RDI); + auto Src2 = _LoadMem(Size, Dest_RSI); + + auto ALUOp = _Sub(Src1, Src2); + GenerateFlags_SUB(Op, ALUOp, Src1, Src2); + + // Can we end the block? + auto CanLeaveCond = _Select(FEXCore::IR::COND_EQ, + Counter, ZeroConst, + OneConst, ZeroConst); + auto CondJump = _CondJump(); + CondJump.first->Header.NumArgs = 1; + CondJump.first->Header.Args[0] = CanLeaveCond.Node->Wrapped(ListData.Begin()); + + // Decrement counter + Counter = _Sub(Counter, OneConst); + + // Store the counter so we don't have to deal with PHI here + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX]), Counter); + + // Offset the pointer + Dest_RDI = _Add(Dest_RDI, PtrDir); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI]), Dest_RDI); + + // Offset second pointer + Dest_RSI = _Add(Dest_RSI, PtrDir); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSI]), Dest_RSI); + + // Jump back to the start, we have more work to do + _Jump(LoopStart); + _EndBlock(0); + // Make sure to start a new block after ending this one + auto LoopEnd = _BeginBlock(); + CondJump.first->Header.Args[1] = LoopEnd.Node->Wrapped(ListData.Begin()); + +} + +void OpDispatchBuilder::BSWAPOp(OpcodeArgs) { + OrderedNode *Dest; + if (GetSrcSize(Op) == 2) { + // BSWAP of 16bit is undef. ZEN+ causes the lower 16bits to get zero'd + Dest = _Constant(0); + } + else { + Dest = LoadSource(Op, Op->Dest, Op->Flags); + Dest = _Rev(Dest); + } + StoreResult(Op, Dest); +} + +void OpDispatchBuilder::NEGOp(OpcodeArgs) { + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + auto ZeroConst = _Constant(0); + auto ALUOp = _Sub(ZeroConst, Dest); + + StoreResult(Op, ALUOp); + + auto Size = GetSrcSize(Op) * 8; + + GenerateFlags_SUB(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, ZeroConst), _Bfe(Size, 0, Dest)); +} + +void OpDispatchBuilder::DIVOp(OpcodeArgs) { + // This loads the divisor + OrderedNode *Divisor = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op); + + if (Size == 1) { + OrderedNode *Src1 = _LoadContext(2, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + + auto UDivOp = _UDiv(Src1, Divisor); + auto URemOp = _URem(Src1, Divisor); + + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]) + 1, URemOp); + } + else if (Size == 2) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + auto UDivOp = _LUDiv(Src1, Src2, Divisor); + auto URemOp = _LURem(Src1, Src2, Divisor); + + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), URemOp); + } + else if (Size == 4) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + + auto UDivOp = _LUDiv(Src1, Src2, Divisor); + auto URemOp = _LURem(Src1, Src2, Divisor); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), _Zext(32, UDivOp)); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), _Zext(32, URemOp)); + } + else if (Size == 8) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + + auto UDivOp = _LUDiv(Src1, Src2, Divisor); + auto URemOp = _LURem(Src1, Src2, Divisor); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), URemOp); + } +} + +void OpDispatchBuilder::IDIVOp(OpcodeArgs) { + // This loads the divisor + OrderedNode *Divisor = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op); + + if (Size == 1) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + + auto UDivOp = _Div(Src1, Divisor); + auto URemOp = _Rem(Src1, Divisor); + + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]) + 1, URemOp); + } + else if (Size == 2) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + auto UDivOp = _LDiv(Src1, Src2, Divisor); + auto URemOp = _LRem(Src1, Src2, Divisor); + + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), URemOp); + } + else if (Size == 4) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + + auto UDivOp = _LDiv(Src1, Src2, Divisor); + auto URemOp = _LRem(Src1, Src2, Divisor); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), _Zext(32, UDivOp)); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), _Zext(32, URemOp)); + } + else if (Size == 8) { + OrderedNode *Src1 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + OrderedNode *Src2 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX])); + + auto UDivOp = _LDiv(Src1, Src2, Divisor); + auto URemOp = _LRem(Src1, Src2, Divisor); + + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), UDivOp); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]), URemOp); + } +} + +void OpDispatchBuilder::BSFOp(OpcodeArgs) { + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + // Find the LSB of this source + auto Result = _FindLSB(Src); + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + // If Src was zero then the destination doesn't get modified + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Src, ZeroConst, + Dest, Result); + + // ZF is set to 1 if the source was zero + auto ZFSelectOp = _Select(FEXCore::IR::COND_EQ, + Src, ZeroConst, + OneConst, ZeroConst); + + StoreResult(Op, SelectOp); + SetRFLAG(ZFSelectOp); +} + +void OpDispatchBuilder::BSROp(OpcodeArgs) { + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + // Find the MSB of this source + auto Result = _FindMSB(Src); + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + // If Src was zero then the destination doesn't get modified + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Src, ZeroConst, + Dest, Result); + + // ZF is set to 1 if the source was zero + auto ZFSelectOp = _Select(FEXCore::IR::COND_EQ, + Src, ZeroConst, + OneConst, ZeroConst); + + StoreResult(Op, SelectOp); + SetRFLAG(ZFSelectOp); +} + +void OpDispatchBuilder::MOVUPSOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + StoreResult(Op, Src); +} + +void OpDispatchBuilder::MOVLHPSOp(OpcodeArgs) { + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + auto Result = _VInsElement(16, 8, 1, 0, Dest, Src); + StoreResult(Op, Result); +} + +void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit + if (Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + // If the destination is a GPR then the source is memory + // xmm1[127:64] = src + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + auto Result = _VInsElement(16, 8, 1, 0, Dest, Src); + StoreResult(Op, Result); + } + else { + // In this case memory is the destination and the high bits of the XMM are source + // Mem64 = xmm1[127:64] + auto Result = _VInsElement(16, 8, 0, 1, Src, Src); + StoreResult(Op, Result); + } +} + +void OpDispatchBuilder::PADDQOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + uint8_t ElementSize = 8; + switch (Op->OP) { + case 0xD4: ElementSize = 8; break; + case 0xFC: ElementSize = 1; break; + case 0xFE: ElementSize = 4; break; + default: LogMan::Msg::A("Unknown PADD op: 0x%04x", Op->OP); break; + } + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _VAdd(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::PSUBQOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + uint8_t ElementSize = 8; + switch (Op->OP) { + case 0xF8: ElementSize = 1; break; + case 0xF9: ElementSize = 2; break; + case 0xFA: ElementSize = 4; break; + case 0xFB: ElementSize = 8; break; + default: LogMan::Msg::A("Unknown PSUB op: 0x%04x", Op->OP); break; + } + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _VSub(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +template +void OpDispatchBuilder::PMINUOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _VUMin(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::PMINSWOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _VSMin(Size, 2, Dest, Src); + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::VectorALUOp(OpcodeArgs) { + FEXCore::IR::IROps IROp; + switch (Op->OP) { + case 0xEB: + IROp = FEXCore::IR::IROps::OP_VOR; + break; + case 0xEF: + IROp = FEXCore::IR::IROps::OP_VXOR; + break; + default: + IROp = FEXCore::IR::IROps::OP_LAST; + LogMan::Msg::A("Unknown ALU Op"); + break; + } + + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _Add(Dest, Src); + // Overwrite our IR's op type + ALUOp.first->Header.Op = IROp; + + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::MOVQOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit + if (Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + _StoreContext(8, offsetof(FEXCore::Core::CPUState, xmm[Op->Dest.TypeGPR.GPR - FEXCore::X86State::REG_XMM_0][0]), Src); + auto Const = _Constant(0); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, xmm[Op->Dest.TypeGPR.GPR - FEXCore::X86State::REG_XMM_0][1]), Const); + } + else { + // This is simple, just store the result + StoreResult(Op, Src); + } +} + +void OpDispatchBuilder::PMOVMSKBOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + OrderedNode *CurrentVal = _Constant(0); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + for (unsigned i = 0; i < Size; ++i) { + // Extract the top bit of the element + OrderedNode *Tmp = _Bfe(1, ((i + 1) * 8) - 1, Src); + // Shift it to the correct location + Tmp = _Lshl(Tmp, _Constant(i)); + + // Or it with the current value + CurrentVal = _Or(CurrentVal, Tmp); + } + StoreResult(Op, CurrentVal); +} + +void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + uint8_t ElementSize = 8; + switch (Op->OP) { + case 0x60: ElementSize = 1; break; + case 0x61: ElementSize = 2; break; + case 0x62: ElementSize = 4; break; + } + + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + auto ALUOp = _VZip(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + uint8_t ElementSize = 8; + switch (Op->OP) { + case 0x68: ElementSize = 1; break; + case 0x69: ElementSize = 2; break; + case 0x6A: ElementSize = 4; break; + case 0x6D: ElementSize = 8; break; + } + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + + auto ALUOp = _VZip2(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +template +void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) { + LogMan::Throw::A(ElementSize != 0, "What. No element size?"); + auto Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + uint8_t Shuffle = Op->Src2.TypeLiteral.Literal; + + uint8_t NumElements = Size / ElementSize; + if (ElementSize == 2) { + NumElements /= 2; + } + + uint8_t BaseElement = Low ? 0 : NumElements; + + auto Dest = Src; + for (uint8_t Element = 0; Element < NumElements; ++Element) { + Dest = _VInsElement(Size, ElementSize, BaseElement + Element, BaseElement + (Shuffle & 0b11), Dest, Src); + Shuffle >>= 2; + } + + StoreResult(Op, Dest); +} + +template +void OpDispatchBuilder::SHUFOp(OpcodeArgs) { + LogMan::Throw::A(ElementSize != 0, "What. No element size?"); + auto Size = GetSrcSize(Op); + OrderedNode *Src1 = LoadSource(Op, Op->Dest, Op->Flags); + OrderedNode *Src2 = LoadSource(Op, Op->Src1, Op->Flags); + uint8_t Shuffle = Op->Src2.TypeLiteral.Literal; + + uint8_t NumElements = Size / ElementSize; + + auto Dest = Src1; + std::array Srcs = { + Src1, Src2 + }; + + // [63:0] = Src1[Selection] + // [127:64] = Src2[Selection] + for (uint8_t Element = 0; Element < NumElements; ++Element) { + Dest = _VInsElement(Size, ElementSize, Element, Shuffle & 0b1, Dest, Srcs[Element]); + Shuffle >>= 1; + } + + StoreResult(Op, Dest); +} +void OpDispatchBuilder::PCMPEQOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + uint8_t ElementSize = 4; + switch (Op->OP) { + case 0x74: ElementSize = 1; break; + case 0x75: ElementSize = 2; break; + case 0x76: ElementSize = 4; break; + default: LogMan::Msg::A("Unknown ElementSize"); break; + } + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + // This maps 1:1 to an AArch64 NEON Op + auto ALUOp = _VCMPEQ(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +template +void OpDispatchBuilder::PCMPGTOp(OpcodeArgs) { + auto Size = GetSrcSize(Op); + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + // This maps 1:1 to an AArch64 NEON Op + auto ALUOp = _VCMPGT(Size, ElementSize, Dest, Src); + StoreResult(Op, ALUOp); +} + +void OpDispatchBuilder::MOVDOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1,Op->Flags); + if (Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR && + Op->Dest.TypeGPR.GPR >= FEXCore::X86State::REG_XMM_0) { + // When destination is XMM then it is zext to 128bit + uint64_t SrcSize = GetSrcSize(Op) * 8; + while (SrcSize != 128) { + Src = _Zext(SrcSize, Src); + SrcSize *= 2; + } + } + StoreResult(Op, Op->Dest, Src); +} + +void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { +// CMPXCHG ModRM, reg, {RAX} +// MemData = *ModRM.dest +// if (RAX == MemData) +// modRM.dest = reg; +// ZF = 1 +// else +// ZF = 0 +// RAX = MemData +// +// CASL Xs, Xt, Xn +// MemData = *Xn +// if (MemData == Xs) +// *Xn = Xt +// Xs = MemData + if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || + Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + LogMan::Msg::A("We don't support CMPXCHG to FS/GS segment"); + } + + auto Size = GetSrcSize(Op); + // If this is a memory location then we want the pointer to it + OrderedNode *Src1 = LoadSource(Op, Op->Dest, Op->Flags, false); + + // This is our source register + OrderedNode *Src2 = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Src3 = _LoadContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX])); + // 0x80014000 + // 0x80064000 + // 0x80064000 + + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + if (Op->Dest.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + // If our destination is a GPR then this behaves differently + // RAX = RAX == Op1 ? RAX : Op1 + // AKA if they match then don't touch RAX value + // Otherwise set it to the rm operand + OrderedNode *RAXResult = _Select(FEXCore::IR::COND_EQ, + Src1, Src3, + Src3, Src1); + + // Op1 = RAX == Op1 ? Op2 : Op1 + // If they match then set the rm operand to the input + // else don't set the rm operand + OrderedNode *DestResult = _Select(FEXCore::IR::COND_EQ, + Src1, Src3, + Src2, Src1); + + // ZF = RAX == Op1 ? 1 : 0 + // Result of compare + OrderedNode *ZFResult = _Select(FEXCore::IR::COND_EQ, + Src1, Src3, + OneConst, ZeroConst); + + // Set ZF + SetRFLAG(ZFResult); + if (Size < 4) { + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), RAXResult); + } + else { + if (Size == 4) { + RAXResult = _Zext(32, RAXResult); + } + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), RAXResult); + } + + // Store in to GPR Dest + // Have to make sure this is after the result store in RAX for when Dest == RAX + StoreResult(Op, DestResult); + } + else { + // DataSrc = *Src1 + // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc + // This will write to memory! Careful! + // Third operand must be a calculated guest memory address + OrderedNode *CASResult = _CAS(Src3, Src2, Src1); + + // If our CASResult(OldMem value) is equal to our comparison + // Then we managed to set the memory + OrderedNode *ZFResult = _Select(FEXCore::IR::COND_EQ, + CASResult, Src3, + OneConst, ZeroConst); + + // RAX gets the result of the CAS op + if (Size < 4) { + _StoreContext(Size, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), CASResult); + } + else { + if (Size == 4) { + CASResult = _Zext(32, CASResult); + } + _StoreContext(8, offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]), CASResult); + } + + // Set ZF + SetRFLAG(ZFResult); + } +} + +void OpDispatchBuilder::BeginBlock() { + _BeginBlock(); +} + +void OpDispatchBuilder::EndBlock(uint64_t RIPIncrement) { + _EndBlock(RIPIncrement); +} + +void OpDispatchBuilder::ExitFunction() { + _ExitFunction(); +} + +uint8_t OpDispatchBuilder::GetDstSize(FEXCore::X86Tables::DecodedOp Op) { + constexpr std::array Sizes = { + 0, // Invalid DEF + 1, + 2, + 4, + 8, + 16, + 32 + }; + + uint32_t DstSizeFlag = FEXCore::X86Tables::DecodeFlags::GetSizeDstFlags(Op->Flags); + uint8_t Size = Sizes[DstSizeFlag]; + LogMan::Throw::A(Size != 0, "Invalid destination size for op"); + return Size; +} + +uint8_t OpDispatchBuilder::GetSrcSize(FEXCore::X86Tables::DecodedOp Op) { + constexpr std::array Sizes = { + 0, // Invalid DEF + 1, + 2, + 4, + 8, + 16, + 32 + }; + + uint32_t SrcSizeFlag = FEXCore::X86Tables::DecodeFlags::GetSizeSrcFlags(Op->Flags); + uint8_t Size = Sizes[SrcSizeFlag]; + LogMan::Throw::A(Size != 0, "Invalid destination size for op"); + return Size; +} + +OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint8_t OpSize, uint32_t Flags, bool LoadData, bool ForceLoad) { + LogMan::Throw::A(Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_DIRECT || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_INDIRECT || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_RIP_RELATIVE || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_SIB + , "Unsupported Src type"); + + OrderedNode *Src {nullptr}; + bool LoadableType = false; + if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL) { + Src = _Constant(Operand.TypeLiteral.Size * 8, Operand.TypeLiteral.Literal); + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + if (Operand.TypeGPR.GPR >= FEXCore::X86State::REG_XMM_0) { + Src = _LoadContext(OpSize, offsetof(FEXCore::Core::CPUState, xmm[Operand.TypeGPR.GPR - FEXCore::X86State::REG_XMM_0][Operand.TypeGPR.HighBits ? 1 : 0])); + } + else { + Src = _LoadContext(OpSize, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPR.GPR]) + (Operand.TypeGPR.HighBits ? 1 : 0)); + } + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_DIRECT) { + Src = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPR.GPR])); + LoadableType = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_INDIRECT) { + auto GPR = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPRIndirect.GPR])); + auto Constant = _Constant(Operand.TypeGPRIndirect.Displacement); + + Src = _Add(GPR, Constant); + + LoadableType = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_RIP_RELATIVE) { + Src = _Constant(Operand.TypeRIPLiteral.Literal + Op->PC + Op->InstSize); + LoadableType = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_SIB) { + OrderedNode *Tmp {}; + if (Operand.TypeSIB.Index != FEXCore::X86State::REG_INVALID) { + Tmp = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeSIB.Index])); + + if (Operand.TypeSIB.Scale != 1) { + auto Constant = _Constant(Operand.TypeSIB.Scale); + Tmp = _Mul(Tmp, Constant); + } + } + + if (Operand.TypeSIB.Base != FEXCore::X86State::REG_INVALID) { + auto GPR = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeSIB.Base])); + if (Tmp != nullptr) { + Tmp = _Add(Tmp, GPR); + } + else { + Tmp = GPR; + } + } + + if (Operand.TypeSIB.Offset) { + if (Tmp != nullptr) { + Src = _Add(Tmp, _Constant(Operand.TypeSIB.Offset)); + } + else { + Src = _Constant(Operand.TypeSIB.Offset); + } + } + else { + if (Tmp != nullptr) { + Src = Tmp; + } + else { + Src = _Constant(0); + } + } + + LoadableType = true; + } + else { + LogMan::Msg::A("Unknown Src Type: %d\n", Operand.TypeNone.Type); + } + + if ((LoadableType && LoadData) || ForceLoad) { + if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX) { + Src = _Add(Src, _LoadContext(8, offsetof(FEXCore::Core::CPUState, fs))); + } + else if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + Src = _Add(Src, _LoadContext(8, offsetof(FEXCore::Core::CPUState, gs))); + } + + Src = _LoadMem(Src, OpSize); + } + return Src; +} + +OrderedNode *OpDispatchBuilder::LoadSource(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint32_t Flags, bool LoadData, bool ForceLoad) { + uint8_t OpSize = GetSrcSize(Op); + return LoadSource_WithOpSize(Op, Operand, OpSize, Flags, LoadData, ForceLoad); +} + +void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize) { + LogMan::Throw::A((Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_DIRECT || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_INDIRECT || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_RIP_RELATIVE || + Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_SIB + ), "Unsupported Dest type"); + + // 8Bit and 16bit destination types store their result without effecting the upper bits + // 32bit ops ZEXT the result to 64bit + OrderedNode *MemStoreDst {nullptr}; + bool MemStore = false; + + if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL) { + MemStoreDst = _Constant(Operand.TypeLiteral.Size * 8, Operand.TypeLiteral.Literal); + MemStore = true; // Literals are ONLY hardcoded memory destinations + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) { + if (Operand.TypeGPR.GPR >= FEXCore::X86State::REG_XMM_0) { + _StoreContext(Src, OpSize, offsetof(FEXCore::Core::CPUState, xmm[Operand.TypeGPR.GPR - FEXCore::X86State::REG_XMM_0][Operand.TypeGPR.HighBits ? 1 : 0])); + } + else { + if (OpSize == 4) { + LogMan::Throw::A(!Operand.TypeGPR.HighBits, "Can't handle 32bit store to high 8bit register"); + auto ZextOp = _Zext(Src, 32); + + _StoreContext(ZextOp, 8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPR.GPR])); + } + else { + _StoreContext(Src, std::min(static_cast(8), OpSize), offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPR.GPR]) + (Operand.TypeGPR.HighBits ? 1 : 0)); + } + } + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_DIRECT) { + MemStoreDst = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPR.GPR])); + MemStore = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR_INDIRECT) { + auto GPR = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeGPRIndirect.GPR])); + auto Constant = _Constant(Operand.TypeGPRIndirect.Displacement); + + MemStoreDst = _Add(GPR, Constant); + MemStore = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_RIP_RELATIVE) { + MemStoreDst = _Constant(Operand.TypeRIPLiteral.Literal + Op->PC + Op->InstSize); + MemStore = true; + } + else if (Operand.TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_SIB) { + OrderedNode *Tmp {}; + if (Operand.TypeSIB.Index != FEXCore::X86State::REG_INVALID) { + Tmp = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeSIB.Index])); + + if (Operand.TypeSIB.Scale != 1) { + auto Constant = _Constant(Operand.TypeSIB.Scale); + Tmp = _Mul(Tmp, Constant); + } + } + + if (Operand.TypeSIB.Base != FEXCore::X86State::REG_INVALID) { + auto GPR = _LoadContext(8, offsetof(FEXCore::Core::CPUState, gregs[Operand.TypeSIB.Base])); + + if (Tmp != nullptr) { + Tmp = _Add(Tmp, GPR); + } + else { + Tmp = GPR; + } + } + + if (Operand.TypeSIB.Offset) { + if (Tmp != nullptr) { + MemStoreDst = _Add(Tmp, _Constant(Operand.TypeSIB.Offset)); + } + else { + MemStoreDst = _Constant(Operand.TypeSIB.Offset); + } + } + else { + if (Tmp != nullptr) { + MemStoreDst = Tmp; + } + else { + MemStoreDst = _Constant(0); + } + } + + MemStore = true; + } + + if (MemStore) { + if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX) { + MemStoreDst = _Add(MemStoreDst, _LoadContext(8, offsetof(FEXCore::Core::CPUState, fs))); + } + else if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + MemStoreDst = _Add(MemStoreDst, _LoadContext(8, offsetof(FEXCore::Core::CPUState, gs))); + } + + _StoreMem(OpSize, MemStoreDst, Src); + } +} + +void OpDispatchBuilder::StoreResult(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src) { + return StoreResult_WithOpSize(Op, Operand, Src, GetDstSize(Op)); +} + +void OpDispatchBuilder::StoreResult(FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src) { + StoreResult(Op, Op->Dest, Src); +} + +void OpDispatchBuilder::TestFunction() { + printf("Doing Test Function\n"); + _BeginBlock(); + auto Load1 = _LoadContext(8, 0); + auto Load2 = _LoadContext(8, 0); + //auto Res = Load1 Load2; + auto Res = _Add(Load1, Load2); + _StoreContext(Res, 8, 0); + + std::stringstream out; + auto IR = ViewIR(); + FEXCore::IR::Dump(&out, &IR); + + printf("List Data Size: %ld\n", ListData.Size()); + printf("IR:\n%s\n@@@@@\n", out.str().c_str()); +} + +OpDispatchBuilder::OpDispatchBuilder() + : Data {8 * 1024 * 1024} + , ListData {8 * 1024 * 1024} { + ResetWorkingList(); +} + +void OpDispatchBuilder::ResetWorkingList() { + Data.Reset(); + ListData.Reset(); + CurrentWriteCursor = nullptr; + // This is necessary since we do "null" pointer checks + ListData.Allocate(sizeof(OrderedNode)); + DecodeFailure = false; + Information.HadUnconditionalExit = false; + ShouldDump = false; +} + +template +void OpDispatchBuilder::SetRFLAG(OrderedNode *Value) { + _StoreFlag(Value, BitOffset); +} +void OpDispatchBuilder::SetRFLAG(OrderedNode *Value, unsigned BitOffset) { + _StoreFlag(Value, BitOffset); +} + +OrderedNode *OpDispatchBuilder::GetRFLAG(unsigned BitOffset) { + return _LoadFlag(BitOffset); +} +constexpr std::array FlagOffsets = { + FEXCore::X86State::RFLAG_CF_LOC, + FEXCore::X86State::RFLAG_PF_LOC, + FEXCore::X86State::RFLAG_AF_LOC, + FEXCore::X86State::RFLAG_ZF_LOC, + FEXCore::X86State::RFLAG_SF_LOC, + FEXCore::X86State::RFLAG_TF_LOC, + FEXCore::X86State::RFLAG_IF_LOC, + FEXCore::X86State::RFLAG_DF_LOC, + FEXCore::X86State::RFLAG_OF_LOC, + FEXCore::X86State::RFLAG_IOPL_LOC, + FEXCore::X86State::RFLAG_NT_LOC, + FEXCore::X86State::RFLAG_RF_LOC, + FEXCore::X86State::RFLAG_VM_LOC, + FEXCore::X86State::RFLAG_AC_LOC, + FEXCore::X86State::RFLAG_VIF_LOC, + FEXCore::X86State::RFLAG_VIP_LOC, + FEXCore::X86State::RFLAG_ID_LOC, +}; + +void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { + uint8_t NumFlags = FlagOffsets.size(); + if (Lower8) { + NumFlags = 5; + } + auto OneConst = _Constant(1); + for (int i = 0; i < NumFlags; ++i) { + auto Tmp = _And(_Lshr(Src, _Constant(FlagOffsets[i])), OneConst); + SetRFLAG(Tmp, FlagOffsets[i]); + } +} + +OrderedNode *OpDispatchBuilder::GetPackedRFLAG(bool Lower8) { + OrderedNode *Original = _Constant(2); + uint8_t NumFlags = FlagOffsets.size(); + if (Lower8) { + NumFlags = 5; + } + + for (int i = 0; i < NumFlags; ++i) { + OrderedNode *Flag = _LoadFlag(FlagOffsets[i]); + Flag = _Zext(32, Flag); + Flag = _Lshl(Flag, _Constant(FlagOffsets[i])); + Original = _Or(Original, Flag); + } + return Original; +} + +void OpDispatchBuilder::GenerateFlags_ADC(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + auto Size = GetSrcSize(Op) * 8; + // AF + { + OrderedNode *AFRes = _Xor(_Xor(Src1, Src2), Res); + AFRes = _Bfe(1, 4, AFRes); + SetRFLAG(AFRes); + } + + // SF + { + auto ThirtyOneConst = _Constant(Size - 1); + + auto LshrOp = _Lshr(Res, ThirtyOneConst); + SetRFLAG(LshrOp); + } + + // PF + { + auto PopCountOp = _Popcount(_And(Res, _Constant(0xFF))); + + auto XorOp = _Xor(PopCountOp, OneConst); + SetRFLAG(XorOp); + } + + // ZF + { + auto Dst8 = _Bfe(Size, 0, Res); + + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Dst8, ZeroConst, OneConst, ZeroConst); + SetRFLAG(SelectOp); + } + + // CF + // Unsigned + { + auto Dst8 = _Bfe(Size, 0, Res); + auto Src8 = _Bfe(Size, 0, Src2); + + auto SelectOpLT = _Select(FEXCore::IR::COND_LT, Dst8, Src8, OneConst, ZeroConst); + auto SelectOpLE = _Select(FEXCore::IR::COND_LE, Dst8, Src8, OneConst, ZeroConst); + auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, OneConst, SelectOpLE, SelectOpLT); + SetRFLAG(SelectCF); + } + + // OF + // Signed + { + auto NegOne = _Constant(~0ULL); + auto XorOp1 = _Xor(_Xor(Src1, Src2), NegOne); + auto XorOp2 = _Xor(Res, Src1); + OrderedNode *AndOp1 = _And(XorOp1, XorOp2); + + switch (Size) { + case 8: + AndOp1 = _Bfe(1, 7, AndOp1); + break; + case 16: + AndOp1 = _Bfe(1, 15, AndOp1); + break; + case 32: + AndOp1 = _Bfe(1, 31, AndOp1); + break; + case 64: + AndOp1 = _Bfe(1, 63, AndOp1); + break; + default: LogMan::Msg::A("Unknown BFESize: %d", Size); break; + } + SetRFLAG(AndOp1); + } +} + +void OpDispatchBuilder::GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + // AF + { + OrderedNode *AFRes = _Xor(_Xor(Src1, Src2), Res); + AFRes = _Bfe(1, 4, AFRes); + SetRFLAG(AFRes); + } + + // SF + { + auto ThirtyOneConst = _Constant(GetSrcSize(Op) * 8 - 1); + + auto LshrOp = _Lshr(Res, ThirtyOneConst); + SetRFLAG(LshrOp); + } + + // PF + { + auto PopCountOp = _Popcount(_And(Res, _Constant(0xFF))); + + auto XorOp = _Xor(PopCountOp, OneConst); + SetRFLAG(XorOp); + } + + // ZF + { + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Res, ZeroConst, OneConst, ZeroConst); + SetRFLAG(SelectOp); + } + + // CF + // Unsigned + { + auto Dst8 = _Bfe(GetSrcSize(Op) * 8, 0, Res); + auto Src8_1 = _Bfe(GetSrcSize(Op) * 8, 0, Src1); + + auto SelectOpLT = _Select(FEXCore::IR::COND_GT, Dst8, Src8_1, OneConst, ZeroConst); + auto SelectOpLE = _Select(FEXCore::IR::COND_GE, Dst8, Src8_1, OneConst, ZeroConst); + auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, OneConst, SelectOpLE, SelectOpLT); + SetRFLAG(SelectCF); + } + + // OF + // Signed + { + auto XorOp1 = _Xor(Src1, Src2); + auto XorOp2 = _Xor(Res, Src1); + OrderedNode *AndOp1 = _And(XorOp1, XorOp2); + + switch (GetSrcSize(Op)) { + case 1: + AndOp1 = _Bfe(1, 7, AndOp1); + break; + case 2: + AndOp1 = _Bfe(1, 15, AndOp1); + break; + case 4: + AndOp1 = _Bfe(1, 31, AndOp1); + break; + case 8: + AndOp1 = _Bfe(1, 63, AndOp1); + break; + default: LogMan::Msg::A("Unknown BFESize: %d", GetSrcSize(Op)); break; + } + SetRFLAG(AndOp1); + } +} + +void OpDispatchBuilder::GenerateFlags_SUB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + // AF + { + OrderedNode *AFRes = _Xor(_Xor(Src1, Src2), Res); + AFRes = _Bfe(1, 4, AFRes); + SetRFLAG(AFRes); + } + + // SF + { + auto SignBitConst = _Constant(GetSrcSize(Op) * 8 - 1); + + auto LshrOp = _Lshr(Res, SignBitConst); + SetRFLAG(LshrOp); + } + + // PF + { + auto EightBitMask = _Constant(0xFF); + auto PopCountOp = _Popcount(_And(Res, EightBitMask)); + auto XorOp = _Xor(PopCountOp, OneConst); + SetRFLAG(XorOp); + } + + // ZF + { + auto Bfe8 = _Bfe(GetSrcSize(Op) * 8, 0, Res); + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Bfe8, ZeroConst, OneConst, ZeroConst); + SetRFLAG(SelectOp); + } + + // CF + { + auto SelectOp = _Select(FEXCore::IR::COND_LT, + Src1, Src2, OneConst, ZeroConst); + + SetRFLAG(SelectOp); + } + // OF + { + auto XorOp1 = _Xor(Src1, Src2); + auto XorOp2 = _Xor(Res, Src1); + OrderedNode *FinalAnd = _And(XorOp1, XorOp2); + + switch (GetSrcSize(Op)) { + case 1: + FinalAnd = _Bfe(1, 7, FinalAnd); + break; + case 2: + FinalAnd = _Bfe(1, 15, FinalAnd); + break; + case 4: + FinalAnd = _Bfe(1, 31, FinalAnd); + break; + case 8: + FinalAnd = _Bfe(1, 63, FinalAnd); + break; + default: LogMan::Msg::A("Unknown BFESize: %d", GetSrcSize(Op)); break; + } + SetRFLAG(FinalAnd); + } +} + +void OpDispatchBuilder::GenerateFlags_ADD(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + // AF + { + OrderedNode *AFRes = _Xor(_Xor(Src1, Src2), Res); + AFRes = _Bfe(1, 4, AFRes); + SetRFLAG(AFRes); + } + + // SF + { + auto SignBitConst = _Constant(GetSrcSize(Op) * 8 - 1); + + auto LshrOp = _Lshr(Res, SignBitConst); + SetRFLAG(LshrOp); + } + + // PF + { + auto EightBitMask = _Constant(0xFF); + auto PopCountOp = _Popcount(_And(Res, EightBitMask)); + auto XorOp = _Xor(PopCountOp, OneConst); + SetRFLAG(XorOp); + } + + // ZF + { + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Res, ZeroConst, OneConst, ZeroConst); + SetRFLAG(SelectOp); + } + // CF + { + auto Dst8 = _Bfe(GetSrcSize(Op) * 8, 0, Res); + auto Src8 = _Bfe(GetSrcSize(Op) * 8, 0, Src2); + + auto SelectOp = _Select(FEXCore::IR::COND_LT, Dst8, Src8, OneConst, ZeroConst); + + SetRFLAG(SelectOp); + } + + // OF + { + auto NegOne = _Constant(~0ULL); + auto XorOp1 = _Xor(_Xor(Src1, Src2), NegOne); + auto XorOp2 = _Xor(Res, Src1); + + OrderedNode *AndOp1 = _And(XorOp1, XorOp2); + + switch (GetSrcSize(Op)) { + case 1: + AndOp1 = _Bfe(1, 7, AndOp1); + break; + case 2: + AndOp1 = _Bfe(1, 15, AndOp1); + break; + case 4: + AndOp1 = _Bfe(1, 31, AndOp1); + break; + case 8: + AndOp1 = _Bfe(1, 63, AndOp1); + break; + default: LogMan::Msg::A("Unknown BFESize: %d", GetSrcSize(Op)); break; + } + SetRFLAG(AndOp1); + } +} + +void OpDispatchBuilder::GenerateFlags_MUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *High) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + auto SignBitConst = _Constant(GetSrcSize(Op) * 8 - 1); + + // PF/AF/ZF/SF + // Undefined + { + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + } + + // CF/OF + { + // CF and OF are set if the result of the operation can't be fit in to the destination register + // If the value can fit then the top bits will be zero + + auto SignBit = _Ashr(Res, SignBitConst); + + auto SelectOp = _Select(FEXCore::IR::COND_EQ, High, SignBit, ZeroConst, OneConst); + + SetRFLAG(SelectOp); + SetRFLAG(SelectOp); + } +} + +void OpDispatchBuilder::GenerateFlags_UMUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *High) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + + // AF/SF/PF/ZF + // Undefined + { + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + } + + // CF/OF + { + // CF and OF are set if the result of the operation can't be fit in to the destination register + // The result register will be all zero if it can't fit due to how multiplication behaves + + auto SelectOp = _Select(FEXCore::IR::COND_EQ, High, ZeroConst, ZeroConst, OneConst); + + SetRFLAG(SelectOp); + SetRFLAG(SelectOp); + } +} + +void OpDispatchBuilder::GenerateFlags_Logical(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + auto ZeroConst = _Constant(0); + auto OneConst = _Constant(1); + // AF + { + // Undefined + // Set to zero anyway + SetRFLAG(ZeroConst); + } + + // SF + { + auto SignBitConst = _Constant(GetSrcSize(Op) * 8 - 1); + + auto LshrOp = _Lshr(Res, SignBitConst); + SetRFLAG(LshrOp); + } + + // PF + { + auto EightBitMask = _Constant(0xFF); + auto PopCountOp = _Popcount(_And(Res, EightBitMask)); + auto XorOp = _Xor(PopCountOp, OneConst); + SetRFLAG(XorOp); + } + + // ZF + { + auto SelectOp = _Select(FEXCore::IR::COND_EQ, + Res, ZeroConst, OneConst, ZeroConst); + SetRFLAG(SelectOp); + } + + // CF/OF + { + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + } +} +void OpDispatchBuilder::GenerateFlags_Rotate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + auto ZeroConst = _Constant(0); + + // CF/OF + // XXX: These are wrong + { + SetRFLAG(ZeroConst); + SetRFLAG(ZeroConst); + } +} + +void OpDispatchBuilder::UnhandledOp(OpcodeArgs) { + DecodeFailure = true; +} + +void OpDispatchBuilder::MOVOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + StoreResult(Op, Src); +} + +void OpDispatchBuilder::ALUOp(OpcodeArgs) { + FEXCore::IR::IROps IROp; + switch (Op->OP) { + case 0x0: + case 0x1: + case 0x2: + case 0x3: + case 0x4: + case 0x5: + IROp = FEXCore::IR::IROps::OP_ADD; + break; + case 0x8: + case 0x9: + case 0xA: + case 0xB: + case 0xC: + case 0xD: + IROp = FEXCore::IR::IROps::OP_OR; + break; + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + IROp = FEXCore::IR::IROps::OP_AND; + break; + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + IROp = FEXCore::IR::IROps::OP_SUB; + break; + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + IROp = FEXCore::IR::IROps::OP_XOR; + break; + default: + IROp = FEXCore::IR::IROps::OP_LAST; + LogMan::Msg::A("Unknown ALU Op: 0x%x", Op->OP); + break; + } + + // X86 basic ALU ops just do the operation between the destination and a single source + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto ALUOp = _Add(Dest, Src); + // Overwrite our IR's op type + ALUOp.first->Header.Op = IROp; + + StoreResult(Op, ALUOp); + + // Flags set + { + auto Size = GetSrcSize(Op) * 8; + switch (IROp) { + case FEXCore::IR::IROps::OP_ADD: + GenerateFlags_ADD(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + case FEXCore::IR::IROps::OP_SUB: + GenerateFlags_SUB(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + case FEXCore::IR::IROps::OP_MUL: + GenerateFlags_MUL(Op, _Bfe(Size, 0, ALUOp), _MulH(Dest, Src)); + break; + case FEXCore::IR::IROps::OP_AND: + case FEXCore::IR::IROps::OP_XOR: + case FEXCore::IR::IROps::OP_OR: { + GenerateFlags_Logical(Op, _Bfe(Size, 0, ALUOp), _Bfe(Size, 0, Dest), _Bfe(Size, 0, Src)); + break; + } + default: break; + } + } +} + +void OpDispatchBuilder::INTOp(OpcodeArgs) { + uint8_t Reason{}; + uint8_t Literal{}; + switch (Op->OP) { + case 0xCC: + Reason = 0; + break; + case 0xCD: + Reason = 1; + Literal = Op->Src1.TypeLiteral.Literal; + break; + case 0xCE: + Reason = 2; + break; + case 0xF1: + Reason = 3; + break; + case 0xF4: { + Reason = 4; + + // We want to set RIP to the next instruction after HLT + auto NewRIP = _Constant(Op->PC + Op->InstSize); + _StoreContext(8, offsetof(FEXCore::Core::CPUState, rip), NewRIP); + break; + } + case 0x0B: + Reason = 5; + break; + } + + if (Op->OP == 0xCE) { // Conditional to only break if Overflow == 1 + auto Flag = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC); + auto CondJump = _CondJump(); + CondJump.first->Header.NumArgs = 1; + // If condition doesn't hold then keep going + CondJump.first->Header.Args[0] = _Xor(Flag, _Constant(1)).Node->Wrapped(ListData.Begin()); + _Break(Reason, Literal); + _EndBlock(0); + + // Make sure to start a new block after ending this one + auto JumpTarget = _BeginBlock(); + CondJump.first->Header.Args[1] = JumpTarget.Node->Wrapped(ListData.Begin()); + } + else { + _Break(Reason, Literal); + } +} + +template +void OpDispatchBuilder::PSRLD(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetSrcSize(Op); + + auto Shift = _VUShr(Size, ElementSize, Dest, Src); + StoreResult(Op, Shift); +} + +template +void OpDispatchBuilder::PSLL(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + auto Size = GetDstSize(Op); + + OrderedNode *Result{}; + + if (Scalar) { + Result = _VUShlS(Size, ElementSize, Dest, Src); + } + else { + Result = _VUShl(Size, ElementSize, Dest, Src); + } + + StoreResult(Op, Result); +} + +void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Dest = LoadSource(Op, Op->Dest, Op->Flags); + + // PSRLDQ shifts by bytes + // Adjust input value by number of bytes + Src = _Lshl(Src, _Constant(3)); + + auto Shift = _Lshr(Dest, Src); + StoreResult(Op, Shift); +} + +void OpDispatchBuilder::MOVDDUPOp(OpcodeArgs) { + OrderedNode *Src = LoadSource(Op, Op->Src1, Op->Flags); + OrderedNode *Res = _CreateVector2(Src, Src); + StoreResult(Op, Res); +} + +#undef OpcodeArgs + +void InstallOpcodeHandlers() { + const std::vector> BaseOpTable = { + // Instructions + {0x00, 6, &OpDispatchBuilder::ALUOp}, + {0x08, 6, &OpDispatchBuilder::ALUOp}, + {0x10, 6, &OpDispatchBuilder::ADCOp}, + {0x18, 6, &OpDispatchBuilder::SBBOp}, + {0x20, 6, &OpDispatchBuilder::ALUOp}, + {0x28, 6, &OpDispatchBuilder::ALUOp}, + {0x30, 6, &OpDispatchBuilder::ALUOp}, + {0x38, 6, &OpDispatchBuilder::CMPOp}, + {0x50, 8, &OpDispatchBuilder::PUSHOp}, + {0x58, 8, &OpDispatchBuilder::POPOp}, + {0x68, 1, &OpDispatchBuilder::PUSHOp}, + {0x6A, 1, &OpDispatchBuilder::PUSHOp}, + + {0x63, 1, &OpDispatchBuilder::MOVSXDOp}, + {0x69, 1, &OpDispatchBuilder::IMUL2SrcOp}, + {0x6B, 1, &OpDispatchBuilder::IMUL2SrcOp}, + {0x70, 16, &OpDispatchBuilder::CondJUMPOp}, + {0x84, 2, &OpDispatchBuilder::TESTOp}, + {0x86, 2, &OpDispatchBuilder::XCHGOp}, + {0x88, 1, &OpDispatchBuilder::MOVOp}, + {0x89, 1, &OpDispatchBuilder::MOVOp}, + // XXX: Causes LLVM to hang? + {0x8A, 1, &OpDispatchBuilder::MOVOp}, + {0x8B, 1, &OpDispatchBuilder::MOVOp}, + + {0x8D, 1, &OpDispatchBuilder::LEAOp}, + {0x90, 8, &OpDispatchBuilder::XCHGOp}, + + {0x98, 1, &OpDispatchBuilder::CDQOp}, + {0x99, 1, &OpDispatchBuilder::CQOOp}, + {0x9E, 1, &OpDispatchBuilder::SAHFOp}, + {0x9F, 1, &OpDispatchBuilder::LAHFOp}, + {0xA0, 4, &OpDispatchBuilder::MOVOffsetOp}, + {0xA4, 2, &OpDispatchBuilder::MOVSOp}, + {0xA6, 2, &OpDispatchBuilder::CMPSOp}, + {0xA8, 2, &OpDispatchBuilder::TESTOp}, + {0xAA, 2, &OpDispatchBuilder::STOSOp}, + {0xB0, 8, &OpDispatchBuilder::MOVOp}, + {0xB8, 8, &OpDispatchBuilder::MOVOp}, + {0xC2, 2, &OpDispatchBuilder::RETOp}, + {0xC9, 1, &OpDispatchBuilder::LEAVEOp}, + {0xCC, 3, &OpDispatchBuilder::INTOp}, + {0xE8, 1, &OpDispatchBuilder::CALLOp}, + {0xE9, 1, &OpDispatchBuilder::JUMPOp}, + {0xEB, 1, &OpDispatchBuilder::JUMPOp}, + {0xF1, 1, &OpDispatchBuilder::INTOp}, + {0xF4, 1, &OpDispatchBuilder::INTOp}, + {0xF5, 1, &OpDispatchBuilder::FLAGControlOp}, + {0xF8, 2, &OpDispatchBuilder::FLAGControlOp}, + {0xFC, 2, &OpDispatchBuilder::FLAGControlOp}, + }; + + const std::vector> TwoByteOpTable = { + // Instructions + {0x00, 1, nullptr}, // GROUP 6 + {0x01, 1, nullptr}, // GROUP 7 + {0x05, 1, &OpDispatchBuilder::SyscallOp}, + {0x0B, 1, &OpDispatchBuilder::INTOp}, + {0x0D, 1, nullptr}, // GROUP P + {0x18, 1, nullptr}, // GROUP 16 + + {0x19, 7, &OpDispatchBuilder::NOPOp}, // NOP with ModRM + + {0x31, 1, &OpDispatchBuilder::RDTSCOp}, + + {0x40, 16, &OpDispatchBuilder::CMOVOp}, + {0x6E, 1, &OpDispatchBuilder::UnhandledOp}, // MOVD + {0x7E, 1, &OpDispatchBuilder::UnhandledOp}, // MOVD + {0x80, 16, &OpDispatchBuilder::CondJUMPOp}, // XXX: Fails to fixup some jumps + {0x90, 16, &OpDispatchBuilder::SETccOp}, // XXX: Causes some unit tests to fail due to flags being incorrect + {0xA2, 1, &OpDispatchBuilder::CPUIDOp}, + {0xA3, 1, &OpDispatchBuilder::BTOp}, // BT + {0xAF, 1, &OpDispatchBuilder::IMUL1SrcOp}, // XXX: Causes issues with LLVM JIT + {0xB0, 2, &OpDispatchBuilder::CMPXCHGOp}, // CMPXCHG + {0xB6, 2, &OpDispatchBuilder::MOVZXOp}, + {0xBC, 1, &OpDispatchBuilder::BSFOp}, // BSF + {0xBD, 1, &OpDispatchBuilder::BSROp}, // BSF + // XXX: Broken on LLVM? + {0xBE, 2, &OpDispatchBuilder::MOVSXOp}, + {0xC8, 8, &OpDispatchBuilder::BSWAPOp}, + + // SSE + // XXX: Broken on LLVM? + {0x10, 2, &OpDispatchBuilder::MOVUPSOp}, + {0x16, 1, &OpDispatchBuilder::MOVLHPSOp}, + {0x17, 1, &OpDispatchBuilder::MOVUPSOp}, + {0x28, 2, &OpDispatchBuilder::MOVUPSOp}, + {0xEB, 1, &OpDispatchBuilder::VectorALUOp}, + + {0x60, 3, &OpDispatchBuilder::PUNPCKLOp}, + {0x64, 1, &OpDispatchBuilder::PCMPGTOp<1>}, + {0x65, 1, &OpDispatchBuilder::PCMPGTOp<2>}, + {0x66, 1, &OpDispatchBuilder::PCMPGTOp<4>}, + {0x68, 3, &OpDispatchBuilder::UnhandledOp}, + {0x6C, 1, &OpDispatchBuilder::UnhandledOp}, + {0x71, 1, nullptr}, // GROUP 12 + {0x72, 1, nullptr}, // GROUP 13 + {0x73, 1, nullptr}, // GROUP 14 + + {0x74, 3, &OpDispatchBuilder::PCMPEQOp}, + {0xAE, 1, nullptr}, // GROUP 15 + {0xB9, 1, nullptr}, // GROUP 10 + {0xBA, 1, nullptr}, // GROUP 8 + {0xC7, 1, nullptr}, // GROUP 9 + + {0xD4, 1, &OpDispatchBuilder::PADDQOp}, + {0xD6, 1, &OpDispatchBuilder::MOVQOp}, + {0xD7, 1, &OpDispatchBuilder::PMOVMSKBOp}, + // XXX: Untested + {0xDA, 1, &OpDispatchBuilder::PMINUOp<1>}, + {0xEA, 1, &OpDispatchBuilder::PMINSWOp}, + {0xEF, 1, &OpDispatchBuilder::VectorALUOp}, + {0xF8, 4, &OpDispatchBuilder::PSUBQOp}, + {0xFE, 1, &OpDispatchBuilder::PADDQOp}, + }; + + const std::vector> PrimaryGroupOpTable = { + #define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + // GROUP 1 + // XXX: Something in this group causing bad syscall when commented out + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 0), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 2), 1, &OpDispatchBuilder::ADCOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 3), 1, &OpDispatchBuilder::SBBOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 4), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 5), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 7), 1, &OpDispatchBuilder::CMPOp}, // CMP + + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 2), 1, &OpDispatchBuilder::ADCOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 3), 1, &OpDispatchBuilder::SBBOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 4), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 7), 1, &OpDispatchBuilder::CMPOp}, + + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 2), 1, &OpDispatchBuilder::ADCOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 3), 1, &OpDispatchBuilder::SBBOp}, // Unit tests find this setting flags incorrectly + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6), 1, &OpDispatchBuilder::SecondaryALUOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 7), 1, &OpDispatchBuilder::CMPOp}, + + // GROUP 2 + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, &OpDispatchBuilder::SHROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, &OpDispatchBuilder::SHROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::ROLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR + + // GROUP 3 + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 0), 1, &OpDispatchBuilder::TESTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 2), 1, &OpDispatchBuilder::NOTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 3), 1, &OpDispatchBuilder::NEGOp}, // NEG + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 4), 1, &OpDispatchBuilder::MULOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 5), 1, &OpDispatchBuilder::IMULOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV + + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 0), 1, &OpDispatchBuilder::TESTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 2), 1, &OpDispatchBuilder::NOTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 3), 1, &OpDispatchBuilder::NEGOp}, // NEG + + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 4), 1, &OpDispatchBuilder::MULOp}, // MUL + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 5), 1, &OpDispatchBuilder::IMULOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV + + // GROUP 4 + {OPD(FEXCore::X86Tables::TYPE_GROUP_4, OpToIndex(0xFE), 0), 1, &OpDispatchBuilder::INCOp}, // INC + {OPD(FEXCore::X86Tables::TYPE_GROUP_4, OpToIndex(0xFE), 1), 1, &OpDispatchBuilder::DECOp}, // DEC + + // GROUP 5 + {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 0), 1, &OpDispatchBuilder::INCOp}, // INC + {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 1), 1, &OpDispatchBuilder::DECOp}, // DEC + {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 2), 1, &OpDispatchBuilder::CALLAbsoluteOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 4), 1, &OpDispatchBuilder::JUMPAbsoluteOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 6), 1, &OpDispatchBuilder::PUSHOp}, + + // GROUP 11 + // XXX: LLVM hangs when commented out? + {OPD(FEXCore::X86Tables::TYPE_GROUP_11, OpToIndex(0xC6), 0), 1, &OpDispatchBuilder::MOVOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_11, OpToIndex(0xC7), 0), 1, &OpDispatchBuilder::MOVOp}, + #undef OPD + }; + + const std::vector> RepModOpTable = { + {0x19, 7, &OpDispatchBuilder::NOPOp}, + + {0x6F, 1, &OpDispatchBuilder::MOVUPSOp}, + // XXX: Causes LLVM to crash if commented out? + {0x7E, 1, &OpDispatchBuilder::MOVQOp}, + {0x7F, 1, &OpDispatchBuilder::MOVUPSOp}, + }; + + const std::vector> RepNEModOpTable = { + {0x12, 1, &OpDispatchBuilder::MOVDDUPOp}, + {0x19, 7, &OpDispatchBuilder::NOPOp}, + {0x70, 1, &OpDispatchBuilder::PSHUFDOp<2, true>}, + }; + + const std::vector> OpSizeModOpTable = { + {0x12, 2, &OpDispatchBuilder::MOVOp}, + {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, + {0x19, 7, &OpDispatchBuilder::NOPOp}, + + {0x60, 3, &OpDispatchBuilder::PUNPCKLOp}, + {0x64, 1, &OpDispatchBuilder::PCMPGTOp<1>}, + {0x65, 1, &OpDispatchBuilder::PCMPGTOp<2>}, + {0x66, 1, &OpDispatchBuilder::PCMPGTOp<4>}, + {0x68, 3, &OpDispatchBuilder::PUNPCKHOp}, + {0x6C, 1, &OpDispatchBuilder::PUNPCKLOp}, + {0x6D, 1, &OpDispatchBuilder::PUNPCKHOp}, + {0x6E, 1, &OpDispatchBuilder::MOVDOp}, + {0x6F, 1, &OpDispatchBuilder::MOVUPSOp}, + {0x70, 1, &OpDispatchBuilder::PSHUFDOp<4, true>}, + + // XXX: Causing IR interpreter some problems + {0x74, 3, &OpDispatchBuilder::PCMPEQOp}, + {0x78, 1, nullptr}, // GROUP 17 + {0x7E, 1, &OpDispatchBuilder::MOVDOp}, + {0x7F, 1, &OpDispatchBuilder::MOVUPSOp}, + {0xC6, 1, &OpDispatchBuilder::SHUFOp<8>}, + + {0xD4, 1, &OpDispatchBuilder::PADDQOp}, + // XXX: Causes LLVM to crash if commented out? + {0xD6, 1, &OpDispatchBuilder::MOVQOp}, + {0xD7, 1, &OpDispatchBuilder::PMOVMSKBOp}, // PMOVMSKB + // XXX: Untested + {0xDA, 1, &OpDispatchBuilder::PMINUOp<1>}, + {0xEA, 1, &OpDispatchBuilder::PMINSWOp}, + + {0xEB, 1, &OpDispatchBuilder::VectorALUOp}, + + {0xEF, 1, &OpDispatchBuilder::VectorALUOp}, // PXOR + {0xF2, 1, &OpDispatchBuilder::PSLL<4, true>}, + {0xF3, 1, &OpDispatchBuilder::PSLL<8, true>}, + {0xF8, 4, &OpDispatchBuilder::PSUBQOp}, + {0xFE, 1, &OpDispatchBuilder::PADDQOp}, + }; + +constexpr uint16_t PF_NONE = 0; +constexpr uint16_t PF_F3 = 1; +constexpr uint16_t PF_66 = 2; +constexpr uint16_t PF_F2 = 3; +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) + const std::vector> SecondaryExtensionOpTable = { + // GROUP 8 + {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_NONE, 4), 1, &OpDispatchBuilder::BTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F3, 4), 1, &OpDispatchBuilder::BTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_66, 4), 1, &OpDispatchBuilder::BTOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F2, 4), 1, &OpDispatchBuilder::BTOp}, + + // GROUP 13 + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::PSRLD<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::PSLL<4, true>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::PSRLD<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::PSLL<4, true>}, + + // GROUP 14 + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::PSRLD<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::PSLL<8, true>}, + + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::PSRLD<4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::PSLL<8, true>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 3), 1, &OpDispatchBuilder::PSRLDQ}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 7), 1, &OpDispatchBuilder::PSLL<16, true>}, + + // GROUP 16 + {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 0), 8, &OpDispatchBuilder::NOPOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 0), 8, &OpDispatchBuilder::NOPOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 0), 8, &OpDispatchBuilder::NOPOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 0), 8, &OpDispatchBuilder::NOPOp}, + }; +#undef OPD + + const std::vector> SecondaryModRMExtensionOpTable = { + }; + const std::vector> X87OpTable = { + }; + + uint64_t NumInsts{}; + auto InstallToTable = [&NumInsts](auto& FinalTable, auto& LocalTable) { + for (auto Op : LocalTable) { + auto OpNum = std::get<0>(Op); + auto Dispatcher = std::get<2>(Op); + for (uint8_t i = 0; i < std::get<1>(Op); ++i) { + LogMan::Throw::A(FinalTable[OpNum + i].OpcodeDispatcher == nullptr, "Duplicate Entry"); + FinalTable[OpNum + i].OpcodeDispatcher = Dispatcher; + if (Dispatcher) + ++NumInsts; + } + } + }; + + [[maybe_unused]] auto CheckTable = [](auto& FinalTable) { + for (size_t i = 0; i < FinalTable.size(); ++i) { + auto const &Op = FinalTable.at(i); + + if (Op.Type != X86Tables::TYPE_INST) continue; // Invalid op, we don't care + if (Op.OpcodeDispatcher == nullptr) { + LogMan::Msg::D("Op: 0x%lx %s didn't have an OpDispatcher", i, Op.Name); + } + } + }; + + InstallToTable(FEXCore::X86Tables::BaseOps, BaseOpTable); + InstallToTable(FEXCore::X86Tables::SecondBaseOps, TwoByteOpTable); + InstallToTable(FEXCore::X86Tables::PrimaryInstGroupOps, PrimaryGroupOpTable); + + InstallToTable(FEXCore::X86Tables::RepModOps, RepModOpTable); + InstallToTable(FEXCore::X86Tables::RepNEModOps, RepNEModOpTable); + InstallToTable(FEXCore::X86Tables::OpSizeModOps, OpSizeModOpTable); + InstallToTable(FEXCore::X86Tables::SecondInstGroupOps, SecondaryExtensionOpTable); + + InstallToTable(FEXCore::X86Tables::X87Ops, X87OpTable); + + // Useful for debugging + // CheckTable(FEXCore::X86Tables::BaseOps); + printf("We installed %ld instructions to the tables\n", NumInsts); +} + +} diff --git a/Source/Interface/Core/OpcodeDispatcher.h b/Source/Interface/Core/OpcodeDispatcher.h new file mode 100644 index 000000000..308b96dbb --- /dev/null +++ b/Source/Interface/Core/OpcodeDispatcher.h @@ -0,0 +1,317 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace FEXCore::IR { +class Pass; +class PassManager; + +class OpDispatchBuilder final { +friend class FEXCore::IR::Pass; +friend class FEXCore::IR::PassManager; +public: + + struct { + bool HadUnconditionalExit {false}; + } Information; + bool ShouldDump {false}; + OpDispatchBuilder(); + + IRListView ViewIR() { return IRListView(&Data, &ListData); } + IRListView *CreateIRCopy() { return new IRListView(&Data, &ListData); } + void ResetWorkingList(); + bool HadDecodeFailure() { return DecodeFailure; } + + void BeginBlock(); + void EndBlock(uint64_t RIPIncrement); + void ExitFunction(); + + // Dispatch builder functions +#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op + void UnhandledOp(OpcodeArgs); + void MOVOp(OpcodeArgs); + void ALUOp(OpcodeArgs); + void INTOp(OpcodeArgs); + void SyscallOp(OpcodeArgs); + void LEAOp(OpcodeArgs); + void NOPOp(OpcodeArgs); + void RETOp(OpcodeArgs); + void SecondaryALUOp(OpcodeArgs); + void ADCOp(OpcodeArgs); + void SBBOp(OpcodeArgs); + void PUSHOp(OpcodeArgs); + void POPOp(OpcodeArgs); + void LEAVEOp(OpcodeArgs); + void CALLOp(OpcodeArgs); + void CALLAbsoluteOp(OpcodeArgs); + void CondJUMPOp(OpcodeArgs); + void JUMPOp(OpcodeArgs); + void JUMPAbsoluteOp(OpcodeArgs); + void TESTOp(OpcodeArgs); + void MOVSXDOp(OpcodeArgs); + void MOVSXOp(OpcodeArgs); + void MOVZXOp(OpcodeArgs); + void CMPOp(OpcodeArgs); + void SETccOp(OpcodeArgs); + void CQOOp(OpcodeArgs); + void CDQOp(OpcodeArgs); + void XCHGOp(OpcodeArgs); + void SAHFOp(OpcodeArgs); + void LAHFOp(OpcodeArgs); + void MOVSegOp(OpcodeArgs); + void FLAGControlOp(OpcodeArgs); + void MOVOffsetOp(OpcodeArgs); + void CMOVOp(OpcodeArgs); + void CPUIDOp(OpcodeArgs); + void SHLOp(OpcodeArgs); + void SHROp(OpcodeArgs); + void ASHROp(OpcodeArgs); + void ROROp(OpcodeArgs); + void ROLOp(OpcodeArgs); + void BTOp(OpcodeArgs); + void IMUL1SrcOp(OpcodeArgs); + void IMUL2SrcOp(OpcodeArgs); + void IMULOp(OpcodeArgs); + void STOSOp(OpcodeArgs); + void MOVSOp(OpcodeArgs); + void CMPSOp(OpcodeArgs); + void BSWAPOp(OpcodeArgs); + + void RDTSCOp(OpcodeArgs); + void INCOp(OpcodeArgs); + void DECOp(OpcodeArgs); + void NEGOp(OpcodeArgs); + void DIVOp(OpcodeArgs); + void IDIVOp(OpcodeArgs); + void BSFOp(OpcodeArgs); + void BSROp(OpcodeArgs); + void CMPXCHGOp(OpcodeArgs); + void MULOp(OpcodeArgs); + void NOTOp(OpcodeArgs); + + // SSE + void MOVUPSOp(OpcodeArgs); + void MOVLHPSOp(OpcodeArgs); + void MOVHPDOp(OpcodeArgs); + void VectorALUOp(OpcodeArgs); + void MOVQOp(OpcodeArgs); + void PADDQOp(OpcodeArgs); + void PSUBQOp(OpcodeArgs); + template + void PMINUOp(OpcodeArgs); + void PMINSWOp(OpcodeArgs); + void PMOVMSKBOp(OpcodeArgs); + void PUNPCKLOp(OpcodeArgs); + void PUNPCKHOp(OpcodeArgs); + template + void PSHUFDOp(OpcodeArgs); + void PCMPEQOp(OpcodeArgs); + template + void PCMPGTOp(OpcodeArgs); + void MOVDOp(OpcodeArgs); + template + void PSRLD(OpcodeArgs); + template + void PSLL(OpcodeArgs); + void PSRLDQ(OpcodeArgs); + void MOVDDUPOp(OpcodeArgs); + + template + void SHUFOp(OpcodeArgs); + +#undef OpcodeArgs + + /** + * @name IR allocation routines + * + * @{ */ + +// These handlers add cost to the constructor and destructor +// If it becomes an issue then blow them away +// GCC also generates some pretty atrocious code around these +// Use Clang! +#define IROP_ALLOCATE_HELPERS +#define IROP_DISPATCH_HELPERS +#include "IRDefines.inc" + + IRPair _Constant(uint8_t Size, uint64_t Constant) { + auto Op = AllocateOp(); + Op.first->Constant = Constant; + Op.first->Header.Size = Size / 8; + Op.first->Header.Elements = 1; + Op.first->Header.NumArgs = 0; + Op.first->Header.HasDest = true; + return Op; + } + + IRPair _Bfe(uint8_t Width, uint8_t lsb, OrderedNode *ssa0) { + return _Bfe(ssa0, Width, lsb); + } + IRPair _Bfi(uint8_t Width, uint8_t lsb, OrderedNode *ssa0, OrderedNode *ssa1) { + return _Bfi(ssa0, ssa1, Width, lsb); + } + IRPair _StoreMem(uint8_t Size, OrderedNode *ssa0, OrderedNode *ssa1) { + return _StoreMem(ssa0, ssa1, Size); + } + IRPair _LoadMem(uint8_t Size, OrderedNode *ssa0) { + return _LoadMem(ssa0, Size); + } + IRPair _StoreContext(uint8_t Size, uint32_t Offset, OrderedNode *ssa0) { + return _StoreContext(ssa0, Size, Offset); + } + IRPair _Select(uint8_t Cond, OrderedNode *ssa0, OrderedNode *ssa1, OrderedNode *ssa2, OrderedNode *ssa3) { + return _Select(ssa0, ssa1, ssa2, ssa3, Cond); + } + IRPair _Sext(uint8_t Size, OrderedNode *ssa0) { + return _Sext(ssa0, Size); + } + IRPair _Zext(uint8_t Size, OrderedNode *ssa0) { + return _Zext(ssa0, Size); + } + IRPair _VInsElement(uint8_t RegisterSize, uint8_t ElementSize, uint8_t DestIdx, uint8_t SrcIdx, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VInsElement(ssa0, ssa1, RegisterSize, ElementSize, DestIdx, SrcIdx); + } + IRPair _VAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VAdd(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VSub(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VUMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VUMin(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VSMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VSMin(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VZip(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VZip(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VZip2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VZip2(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VCMPEQ(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VCMPEQ(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VCMPGT(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VCMPGT(ssa0, ssa1, RegisterSize, ElementSize); + } + + IRPair _VUShl(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VUShl(ssa0, ssa1, RegisterSize, ElementSize); + } + IRPair _VUShlS(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VUShlS(ssa0, ssa1, RegisterSize, ElementSize); + } + + IRPair _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) { + return _VUShr(ssa0, ssa1, RegisterSize, ElementSize); + } + /** @} */ + + bool IsValueConstant(NodeWrapper ssa, uint64_t *Constant) { + OrderedNode *RealNode = reinterpret_cast(ssa.GetPtr(ListData.Begin())); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(Data.Begin()); + if (IROp->Op == OP_CONSTANT) { + auto Op = IROp->C(); + *Constant = Op->Constant; + return true; + } + return false; + } + + // This is fairly special in how it operates + // Since the node is decoupled from the backing op then we can swap out the backing op without much overhead + // This can potentially cause problems where multiple nodes are pointing to the same IROp + OrderedNode *ReplaceAllUsesWith(OrderedNode *Node, IROp_Header *Op) { + Node->Header.Value.SetOffset(Data.Begin(), reinterpret_cast(Op)); + return Node; + } + + // This is similar to the previous op except that we pass in a node + // This takes the op backing in the new node and replaces the node in the other node + // Again can cause problems where things are pointing to NewNode and haven't been decoupled + OrderedNode *ReplaceAllUsesWith(OrderedNode *Node, OrderedNode *NewNode) { + Node->Header.Value.NodeOffset = NewNode->Header.Value.NodeOffset; + return Node; + } + + void Unlink(OrderedNode *Node) { + Node->Unlink(ListData.Begin()); + } + + void SetPackedRFLAG(bool Lower8, OrderedNode *Src); + OrderedNode *GetPackedRFLAG(bool Lower8); + + void CopyData(OpDispatchBuilder const &rhs) { + LogMan::Throw::A(rhs.Data.BackingSize() <= Data.BackingSize(), "Trying to take ownership of data that is too large"); + LogMan::Throw::A(rhs.ListData.BackingSize() <= ListData.BackingSize(), "Trying to take ownership of data that is too large"); + Data.CopyData(rhs.Data); + ListData.CopyData(rhs.ListData); + } + +private: + void TestFunction(); + bool DecodeFailure{false}; + + OrderedNode *LoadSource(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint32_t Flags, bool LoadData = true, bool ForceLoad = false); + OrderedNode *LoadSource_WithOpSize(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint8_t OpSize, uint32_t Flags, bool LoadData = true, bool ForceLoad = false); + void StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize); + void StoreResult(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src); + void StoreResult(FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src); + uint8_t GetDstSize(FEXCore::X86Tables::DecodedOp Op); + uint8_t GetSrcSize(FEXCore::X86Tables::DecodedOp Op); + + template + void SetRFLAG(OrderedNode *Value); + void SetRFLAG(OrderedNode *Value, unsigned BitOffset); + OrderedNode *GetRFLAG(unsigned BitOffset); + + void GenerateFlags_ADC(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF); + void GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF); + void GenerateFlags_SUB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); + void GenerateFlags_ADD(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); + void GenerateFlags_MUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *High); + void GenerateFlags_UMUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *High); + void GenerateFlags_Logical(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); + void GenerateFlags_Rotate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); + + OrderedNode *CreateNode(IROp_Header *Op) { + uintptr_t ListBegin = ListData.Begin(); + size_t Size = sizeof(OrderedNode); + void *Ptr = ListData.Allocate(Size); + OrderedNode *Node = new (Ptr) OrderedNode(); + Node->Header.Value.SetOffset(Data.Begin(), reinterpret_cast(Op)); + + if (CurrentWriteCursor) { + CurrentWriteCursor->append(ListBegin, Node); + } + CurrentWriteCursor = Node; + return Node; + } + + void SetWriteCursor(OrderedNode *Node) { + CurrentWriteCursor = Node; + } + + OrderedNode *GetWriteCursor() { + return CurrentWriteCursor; + } + + OrderedNode *CurrentWriteCursor = nullptr; + + // These could be combined with a little bit of work to be more efficient with memory usage. Isn't a big deal + IntrusiveAllocator Data; + IntrusiveAllocator ListData; + +}; + +void InstallOpcodeHandlers(); + +} + diff --git a/Source/Interface/Core/RegisterAllocation.cpp b/Source/Interface/Core/RegisterAllocation.cpp new file mode 100644 index 000000000..8563640b4 --- /dev/null +++ b/Source/Interface/Core/RegisterAllocation.cpp @@ -0,0 +1,230 @@ +#include "Common/BitSet.h" +#include "Interface/Core/RegisterAllocation.h" + +#include + +#include + +constexpr uint32_t INVALID_REG = ~0U; +constexpr uint32_t INVALID_CLASS = ~0U; + + +namespace FEXCore::RA { + + struct Register { + }; + + struct RegisterClass { + uint32_t RegisterBase; + uint32_t NumberOfRegisters{0}; + BitSet Registers; + }; + + struct RegisterNode { + uint32_t RegisterClass; + uint32_t Register; + uint32_t InterferenceCount; + uint32_t InterferenceListSize; + uint32_t *InterferenceList; + BitSet Interference; + }; + + static_assert(std::is_pod::value, "We want this to be POD"); + + struct RegisterSet { + Register *Registers; + RegisterClass *RegisterClasses; + uint32_t RegisterCount; + uint32_t ClassCount; + }; + + struct SpillStackUnit { + uint32_t Node; + uint32_t Class; + }; + + struct RegisterGraph { + RegisterSet *Set; + RegisterNode *Nodes; + uint32_t NodeCount; + uint32_t MaxNodeCount; + std::vector SpillStack; + }; + + RegisterSet *AllocateRegisterSet(uint32_t RegisterCount, uint32_t ClassCount) { + RegisterSet *Set = new RegisterSet; + + Set->RegisterCount = RegisterCount; + Set->ClassCount = ClassCount; + + Set->Registers = static_cast(calloc(RegisterCount, sizeof(Register))); + Set->RegisterClasses = static_cast(calloc(ClassCount, sizeof(RegisterClass))); + + for (uint32_t i = 0; i < ClassCount; ++i) { + Set->RegisterClasses[i].Registers.Allocate(RegisterCount); + } + + return Set; + } + + void FreeRegisterSet(RegisterSet *Set) { + for (uint32_t i = 0; i < Set->ClassCount; ++i) { + Set->RegisterClasses[i].Registers.Free(); + } + free(Set->RegisterClasses); + free(Set->Registers); + delete Set; + } + + void AddRegisters(RegisterSet *Set, uint32_t Class, uint32_t RegistersBase, uint32_t RegisterCount) { + for (uint32_t i = 0; i < RegisterCount; ++i) { + Set->RegisterClasses[Class].Registers.Set(RegistersBase + i); + } + Set->RegisterClasses[Class].RegisterBase = RegistersBase; + Set->RegisterClasses[Class].NumberOfRegisters += RegisterCount; + } + + RegisterGraph *AllocateRegisterGraph(RegisterSet *Set, uint32_t NodeCount) { + RegisterGraph *Graph = new RegisterGraph; + Graph->Set = Set; + Graph->NodeCount = NodeCount; + Graph->MaxNodeCount = NodeCount; + Graph->Nodes = static_cast(calloc(NodeCount, sizeof(RegisterNode))); + + // Initialize nodes + for (uint32_t i = 0; i < NodeCount; ++i) { + Graph->Nodes[i].Register = INVALID_REG; + Graph->Nodes[i].RegisterClass = INVALID_CLASS; + Graph->Nodes[i].InterferenceListSize = 32; + Graph->Nodes[i].InterferenceList = reinterpret_cast(calloc(Graph->Nodes[i].InterferenceListSize, sizeof(uint32_t))); + Graph->Nodes[i].InterferenceCount = 0; + Graph->Nodes[i].Interference.Allocate(NodeCount); + Graph->Nodes[i].Interference.Clear(NodeCount); + } + + return Graph; + } + + void ResetRegisterGraph(RegisterGraph *Graph, uint32_t NodeCount) { + if (NodeCount > Graph->MaxNodeCount) { + uint32_t OldNodeCount = Graph->MaxNodeCount; + Graph->NodeCount = NodeCount; + Graph->MaxNodeCount = NodeCount; + Graph->Nodes = static_cast(realloc(Graph->Nodes, NodeCount * sizeof(RegisterNode))); + + // Initialize nodes + for (uint32_t i = 0; i < OldNodeCount; ++i) { + Graph->Nodes[i].Register = INVALID_REG; + Graph->Nodes[i].RegisterClass = INVALID_CLASS; + Graph->Nodes[i].InterferenceCount = 0; + Graph->Nodes[i].Interference.Realloc(NodeCount); + Graph->Nodes[i].Interference.Clear(NodeCount); + } + + for (uint32_t i = OldNodeCount; i < NodeCount; ++i) { + Graph->Nodes[i].Register = INVALID_REG; + Graph->Nodes[i].RegisterClass = INVALID_CLASS; + Graph->Nodes[i].InterferenceListSize = 32; + Graph->Nodes[i].InterferenceList = reinterpret_cast(calloc(Graph->Nodes[i].InterferenceListSize, sizeof(uint32_t))); + Graph->Nodes[i].InterferenceCount = 0; + Graph->Nodes[i].Interference.Allocate(NodeCount); + Graph->Nodes[i].Interference.Clear(NodeCount); + } + } + else { + // We are only handling a node count of this size right now + Graph->NodeCount = NodeCount; + + // Initialize nodes + for (uint32_t i = 0; i < NodeCount; ++i) { + Graph->Nodes[i].Register = INVALID_REG; + Graph->Nodes[i].RegisterClass = INVALID_CLASS; + Graph->Nodes[i].InterferenceCount = 0; + Graph->Nodes[i].Interference.Clear(NodeCount); + } + } + } + + void FreeRegisterGraph(RegisterGraph *Graph) { + for (uint32_t i = 0; i < Graph->MaxNodeCount; ++i) { + RegisterNode *Node = &Graph->Nodes[i]; + Node->InterferenceCount = 0; + Node->InterferenceListSize = 0; + free(Node->InterferenceList); + Node->Interference.Free(); + } + + free(Graph->Nodes); + Graph->NodeCount = 0; + Graph->MaxNodeCount = 0; + delete Graph; + } + + void SetNodeClass(RegisterGraph *Graph, uint32_t Node, uint32_t Class) { + Graph->Nodes[Node].RegisterClass = Class; + } + + void AddNodeInterference(RegisterGraph *Graph, uint32_t Node1, uint32_t Node2) { + auto AddInterference = [&Graph](uint32_t Node1, uint32_t Node2) { + RegisterNode *Node = &Graph->Nodes[Node1]; + Node->Interference.Set(Node2); + if (Node->InterferenceListSize <= Node->InterferenceCount) { + Node->InterferenceListSize *= 2; + Node->InterferenceList = reinterpret_cast(realloc(Node->InterferenceList, Node->InterferenceListSize * sizeof(uint32_t))); + } + Node->InterferenceList[Node->InterferenceCount] = Node2; + ++Node->InterferenceCount; + }; + + AddInterference(Node1, Node2); + AddInterference(Node2, Node1); + } + + uint32_t GetNodeRegister(RegisterGraph *Graph, uint32_t Node) { + return Graph->Nodes[Node].Register; + } + + static bool HasInterference(RegisterGraph *Graph, RegisterNode *Node, uint32_t Register) { + for (uint32_t i = 0; i < Node->InterferenceCount; ++i) { + RegisterNode *IntNode = &Graph->Nodes[Node->InterferenceList[i]]; + if (IntNode->Register == Register) { + return true; + } + } + + return false; + } + + bool AllocateRegisters(RegisterGraph *Graph) { + Graph->SpillStack.clear(); + for (uint32_t i = 0; i < Graph->NodeCount; ++i) { + RegisterNode *CurrentNode = &Graph->Nodes[i]; + if (CurrentNode->RegisterClass == INVALID_CLASS) + continue; + + uint32_t Reg = ~0U; + RegisterClass *RAClass = &Graph->Set->RegisterClasses[CurrentNode->RegisterClass]; + for (uint32_t ri = 0; ri < RAClass->NumberOfRegisters; ++ri) { + if (!HasInterference(Graph, CurrentNode, RAClass->RegisterBase + ri)) { + Reg = ri; + break; + } + } + + if (Reg == ~0U) { + Graph->SpillStack.emplace_back(SpillStackUnit{i, CurrentNode->RegisterClass}); + } + else { + CurrentNode->Register = RAClass->RegisterBase + Reg; + } + } + + if (!Graph->SpillStack.empty()) { + printf("Couldn't allocate %ld registers\n", Graph->SpillStack.size()); + return false; + } + return true; + } + +} + diff --git a/Source/Interface/Core/RegisterAllocation.h b/Source/Interface/Core/RegisterAllocation.h new file mode 100644 index 000000000..d7c044d5e --- /dev/null +++ b/Source/Interface/Core/RegisterAllocation.h @@ -0,0 +1,30 @@ +#pragma once +#include +#include + +namespace FEXCore::RA { +struct RegisterSet; +struct RegisterGraph; +using CrappyBitset = std::vector; + +RegisterSet *AllocateRegisterSet(uint32_t RegisterCount, uint32_t ClassCount); +void FreeRegisterSet(RegisterSet *Set); +void AddRegisters(RegisterSet *Set, uint32_t Class, uint32_t RegistersBase, uint32_t RegisterCount); + +/** + * @name Inference graph handling + * @{ */ + +RegisterGraph *AllocateRegisterGraph(RegisterSet *Set, uint32_t NodeCount); +void FreeRegisterGraph(RegisterGraph *Graph); +void ResetRegisterGraph(RegisterGraph *Graph, uint32_t NodeCount); +void SetNodeClass(RegisterGraph *Graph, uint32_t Node, uint32_t Class); +void AddNodeInterference(RegisterGraph *Graph, uint32_t Node1, uint32_t Node2); +uint32_t GetNodeRegister(RegisterGraph *Graph, uint32_t Node); + +bool AllocateRegisters(RegisterGraph *Graph); + +/** @} */ + +} + diff --git a/Source/Interface/Core/X86DebugInfo.cpp b/Source/Interface/Core/X86DebugInfo.cpp new file mode 100644 index 000000000..fd410ba45 --- /dev/null +++ b/Source/Interface/Core/X86DebugInfo.cpp @@ -0,0 +1,120 @@ +#ifndef NDEBUG +#include +#include +#include + +namespace FEXCore::X86Tables::X86InstDebugInfo { +void InstallDebugInfo() { + + using namespace FEXCore::X86Tables; + auto NoFlags = Flags {0}; + + for (auto &BaseOp : BaseOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : SecondBaseOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : RepModOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : RepNEModOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : OpSizeModOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : PrimaryInstGroupOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : SecondInstGroupOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : SecondModRMTableOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : X87Ops) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : DDDNowOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : H0F38TableOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : H0F3ATableOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : VEXTableOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : VEXTableGroupOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : XOPTableOps) + BaseOp.DebugInfo = NoFlags; + for (auto &BaseOp : XOPTableGroupOps) + BaseOp.DebugInfo = NoFlags; + + const std::vector> BaseOpTable = { + {0x50, 8, {FLAGS_MEM_ACCESS}}, + {0x58, 8, {FLAGS_MEM_ACCESS}}, + + {0x68, 1, {FLAGS_MEM_ACCESS}}, + {0x6A, 1, {FLAGS_MEM_ACCESS}}, + + {0xAA, 4, {FLAGS_MEM_ACCESS}}, + + {0xC8, 1, {FLAGS_MEM_ACCESS}}, + + {0xCC, 2, {FLAGS_DEBUG}}, + + {0xD7, 1, {FLAGS_MEM_ACCESS}}, + + {0xF1, 1, {FLAGS_DEBUG}}, + {0xF4, 1, {FLAGS_DEBUG}}, + }; + + const std::vector> TwoByteOpTable = { + {0x0B, 1, {FLAGS_DEBUG}}, + {0x19, 7, {FLAGS_DEBUG}}, + {0x28, 2, {FLAGS_MEM_ALIGN_16}}, + + {0x31, 1, {FLAGS_DEBUG}}, + + {0xA2, 1, {FLAGS_DEBUG}}, + {0xA3, 1, {FLAGS_MEM_ACCESS}}, + {0xAB, 1, {FLAGS_MEM_ACCESS}}, + {0xB3, 1, {FLAGS_MEM_ACCESS}}, + {0xBB, 1, {FLAGS_MEM_ACCESS}}, + + {0xFF, 1, {FLAGS_DEBUG}}, + }; + + const std::vector> PrimaryGroupOpTable = { +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 6), 2, {FLAGS_DIVIDE}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 6), 2, {FLAGS_DIVIDE}}, +#undef OPD + }; + + const std::vector> SecondaryExtensionOpTable = { +#define PF_NONE 0 +#define PF_F3 1 +#define PF_66 2 +#define PF_F2 3 +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) + {OPD(TYPE_GROUP_15, PF_NONE, 2), 1, {FLAGS_DEBUG}}, + {OPD(TYPE_GROUP_15, PF_NONE, 3), 1, {FLAGS_DEBUG}}, +#undef PF_F3 +#undef PF_66 +#undef PF_F2 +#undef OPD + }; + + auto GenerateDebugTable = [](auto& FinalTable, auto& LocalTable) { + for (auto Op : LocalTable) { + auto OpNum = std::get<0>(Op); + auto DebugInfo = std::get<2>(Op); + for (uint8_t i = 0; i < std::get<1>(Op); ++i) { + memcpy(&FinalTable.at(OpNum+i).DebugInfo, &DebugInfo, sizeof(X86InstDebugInfo::Flags)); + } + } + }; + + GenerateDebugTable(BaseOps, BaseOpTable); + GenerateDebugTable(SecondBaseOps, TwoByteOpTable); + GenerateDebugTable(PrimaryInstGroupOps, PrimaryGroupOpTable); + + GenerateDebugTable(SecondInstGroupOps, SecondaryExtensionOpTable); + + printf("Installing debug info\n"); +} +} +#endif diff --git a/Source/Interface/Core/X86Tables.cpp b/Source/Interface/Core/X86Tables.cpp new file mode 100644 index 000000000..4c02dd0e8 --- /dev/null +++ b/Source/Interface/Core/X86Tables.cpp @@ -0,0 +1,2642 @@ +#include "LogManager.h" + +#include +#include +#include +#include +#include + +namespace FEXCore::X86Tables { + +std::array BaseOps; +std::array SecondBaseOps; + +std::array RepModOps; +std::array RepNEModOps; +std::array OpSizeModOps; +std::array PrimaryInstGroupOps; +std::array SecondInstGroupOps; +std::array SecondModRMTableOps; +std::array X87Ops; +std::array DDDNowOps; +std::array H0F38TableOps; +std::array H0F3ATableOps; +std::array VEXTableOps; +std::array VEXTableGroupOps; +std::array XOPTableOps; +std::array XOPTableGroupOps; + +void InitializeInfoTables() { + using namespace FEXCore::X86Tables::InstFlags; + auto UnknownOp = X86InstInfo{"UND", TYPE_UNKNOWN, FLAGS_NONE, 0, nullptr}; + + for (auto &BaseOp : BaseOps) + BaseOp = UnknownOp; + for (auto &BaseOp : SecondBaseOps) + BaseOp = UnknownOp; + for (auto &BaseOp : RepModOps) + BaseOp = UnknownOp; + for (auto &BaseOp : RepNEModOps) + BaseOp = UnknownOp; + for (auto &BaseOp : OpSizeModOps) + BaseOp = UnknownOp; + for (auto &BaseOp : PrimaryInstGroupOps) + BaseOp = UnknownOp; + for (auto &BaseOp : SecondInstGroupOps) + BaseOp = UnknownOp; + for (auto &BaseOp : SecondModRMTableOps) + BaseOp = UnknownOp; + for (auto &BaseOp : X87Ops) + BaseOp = UnknownOp; + for (auto &BaseOp : DDDNowOps) + BaseOp = UnknownOp; + for (auto &BaseOp : H0F38TableOps) + BaseOp = UnknownOp; + for (auto &BaseOp : H0F3ATableOps) + BaseOp = UnknownOp; + for (auto &BaseOp : VEXTableOps) + BaseOp = UnknownOp; + for (auto &BaseOp : VEXTableGroupOps) + BaseOp = UnknownOp; + for (auto &BaseOp : XOPTableOps) + BaseOp = UnknownOp; + for (auto &BaseOp : XOPTableGroupOps) + BaseOp = UnknownOp; + + const std::vector> BaseOpTable = { + // Prefixes + // Operand size overide + {0x66, 1, X86InstInfo{"", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + // Address size override + {0x67, 1, X86InstInfo{"", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0x2E, 1, X86InstInfo{"CS", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0x3E, 1, X86InstInfo{"DS", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0x26, 1, X86InstInfo{"ES", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + // These are still invalid on 64bit + {0x64, 1, X86InstInfo{"FS", TYPE_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0x65, 1, X86InstInfo{"GS", TYPE_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0x36, 1, X86InstInfo{"SS", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0xF0, 1, X86InstInfo{"LOCK", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0xF2, 1, X86InstInfo{"REPNE", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + {0xF3, 1, X86InstInfo{"REP", TYPE_LEGACY_PREFIX, FLAGS_NONE, 0, nullptr}}, + + // REX + {0x40, 16, X86InstInfo{"", TYPE_REX_PREFIX, FLAGS_NONE, 0, nullptr}}, + + // Instructions + {0x00, 1, X86InstInfo{"ADD", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x01, 1, X86InstInfo{"ADD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 0, nullptr}}, + {0x02, 1, X86InstInfo{"ADD", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x03, 1, X86InstInfo{"ADD", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x04, 1, X86InstInfo{"ADD", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x05, 1, X86InstInfo{"ADD", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x06, 2, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x08, 1, X86InstInfo{"OR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x09, 1, X86InstInfo{"OR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x0A, 1, X86InstInfo{"OR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x0B, 1, X86InstInfo{"OR", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x0C, 1, X86InstInfo{"OR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x0D, 1, X86InstInfo{"OR", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x0E, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x10, 1, X86InstInfo{"ADC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x11, 1, X86InstInfo{"ADC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 0, nullptr}}, + {0x12, 1, X86InstInfo{"ADC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x13, 1, X86InstInfo{"ADC", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x14, 1, X86InstInfo{"ADC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x15, 1, X86InstInfo{"ADC", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x16, 2, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x18, 1, X86InstInfo{"SBB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x19, 1, X86InstInfo{"SBB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 0, nullptr}}, + {0x1A, 1, X86InstInfo{"SBB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x1B, 1, X86InstInfo{"SBB", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x1C, 1, X86InstInfo{"SBB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x1D, 1, X86InstInfo{"SBB", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x1E, 2, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x20, 1, X86InstInfo{"AND", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x21, 1, X86InstInfo{"AND", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x22, 1, X86InstInfo{"AND", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x23, 1, X86InstInfo{"AND", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x24, 1, X86InstInfo{"AND", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x25, 1, X86InstInfo{"AND", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x27, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x28, 1, X86InstInfo{"SUB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x29, 1, X86InstInfo{"SUB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x2A, 1, X86InstInfo{"SUB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x2B, 1, X86InstInfo{"SUB", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x2C, 1, X86InstInfo{"SUB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x2D, 1, X86InstInfo{"SUB", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x2F, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x30, 1, X86InstInfo{"XOR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x31, 1, X86InstInfo{"XOR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x32, 1, X86InstInfo{"XOR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x33, 1, X86InstInfo{"XOR", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x34, 1, X86InstInfo{"XOR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x35, 1, X86InstInfo{"XOR", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x37, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x38, 1, X86InstInfo{"CMP", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x39, 1, X86InstInfo{"CMP", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x3A, 1, X86InstInfo{"CMP", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x3B, 1, X86InstInfo{"CMP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x3C, 1, X86InstInfo{"CMP", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0x3D, 1, X86InstInfo{"CMP", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + {0x3F, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x50, 8, X86InstInfo{"PUSH", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SF_REX_IN_BYTE | FLAGS_DEBUG_MEM_ACCESS , 0, nullptr}}, + {0x58, 8, X86InstInfo{"POP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SF_REX_IN_BYTE | FLAGS_DEBUG_MEM_ACCESS , 0, nullptr}}, + + {0x60, 3, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x63, 1, X86InstInfo{"MOVSXD", TYPE_INST, GenFlagsDstSize(SIZE_64BIT) | FLAGS_MODRM, 0, nullptr}}, + + {0x68, 1, X86InstInfo{"PUSH", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x69, 1, X86InstInfo{"IMUL", TYPE_INST, FLAGS_MODRM | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {0x6A, 1, X86InstInfo{"PUSH", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x6B, 1, X86InstInfo{"IMUL", TYPE_INST, FLAGS_MODRM | FLAGS_SRC_SEXT , 1, nullptr}}, + + // This should just throw a GP + {0x6C, 1, X86InstInfo{"INSB", TYPE_INVALID, FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0x6D, 1, X86InstInfo{"INSW", TYPE_INVALID, FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0x6E, 1, X86InstInfo{"OUTS", TYPE_INVALID, FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0x6F, 1, X86InstInfo{"OUTS", TYPE_INVALID, FLAGS_SUPPORTS_REP, 0, nullptr}}, + + {0x70, 1, X86InstInfo{"JO", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x71, 1, X86InstInfo{"JNO", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x72, 1, X86InstInfo{"JB", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x73, 1, X86InstInfo{"JNB", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x74, 1, X86InstInfo{"JZ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x75, 1, X86InstInfo{"JNZ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x76, 1, X86InstInfo{"JBE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x77, 1, X86InstInfo{"JNBE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x78, 1, X86InstInfo{"JS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x79, 1, X86InstInfo{"JNS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7A, 1, X86InstInfo{"JP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7B, 1, X86InstInfo{"JNP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7C, 1, X86InstInfo{"JL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7D, 1, X86InstInfo{"JNL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7E, 1, X86InstInfo{"JLE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0x7F, 1, X86InstInfo{"JNLE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + + {0x84, 1, X86InstInfo{"TEST", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x85, 1, X86InstInfo{"TEST", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x86, 1, X86InstInfo{"XCHG", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x87, 1, X86InstInfo{"XCHG", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + {0x88, 1, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x89, 1, X86InstInfo{"MOV", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x8A, 1, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0x8B, 1, X86InstInfo{"MOV", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x8C, 1, X86InstInfo{"MOV", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x8D, 1, X86InstInfo{"LEA", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x8E, 1, X86InstInfo{"MOV", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, // MOV seg, modrM == invalid on x86-64 + {0x90, 8, X86InstInfo{"XCHG", TYPE_INST, FLAGS_SF_REX_IN_BYTE | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0x98, 1, X86InstInfo{"CDQE", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0x99, 1, X86InstInfo{"CQO", TYPE_INST, FLAGS_SF_DST_RDX | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0x9A, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // These three are all X87 instructions + {0x9B, 1, X86InstInfo{"FWAIT", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x9C, 1, X86InstInfo{"PUSHF", TYPE_INVALID, GenFlagsSameSize(SIZE_64BIT) , 0, nullptr}}, + {0x9D, 1, X86InstInfo{"POPF", TYPE_INVALID, GenFlagsSameSize(SIZE_64BIT) , 0, nullptr}}, + + {0x9E, 1, X86InstInfo{"SAHF", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {0x9F, 1, X86InstInfo{"LAHF", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + + {0xA0, 1, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX | FLAGS_MEM_OFFSET, 1, nullptr}}, + {0xA1, 1, X86InstInfo{"MOV", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_DISPLACE_SIZE_MUL_2 | FLAGS_MEM_OFFSET, 4, nullptr}}, + {0xA2, 1, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_SRC_RAX | FLAGS_MEM_OFFSET, 1, nullptr}}, + {0xA3, 1, X86InstInfo{"MOV", TYPE_INST, FLAGS_SF_SRC_RAX | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_DISPLACE_SIZE_MUL_2 | FLAGS_MEM_OFFSET, 4, nullptr}}, + + {0xA4, 1, X86InstInfo{"MOVSB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MEM_OFFSET | FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0xA5, 1, X86InstInfo{"MOVS", TYPE_INST, FLAGS_MEM_OFFSET | FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0xA6, 1, X86InstInfo{"CMPSB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MEM_OFFSET | FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0xA7, 1, X86InstInfo{"CMPS", TYPE_INST, FLAGS_MEM_OFFSET | FLAGS_SUPPORTS_REP, 0, nullptr}}, + + {0xA8, 1, X86InstInfo{"TEST", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX , 1, nullptr}}, + {0xA9, 1, X86InstInfo{"TEST", TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {0xAA, 1, X86InstInfo{"STOS", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_SUPPORTS_REP | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0xAB, 1, X86InstInfo{"STOS", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_SUPPORTS_REP | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0xAC, 2, X86InstInfo{"LODS", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_SUPPORTS_REP, 0, nullptr}}, + {0xAE, 2, X86InstInfo{"SCAS", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS, 0, nullptr}}, + + {0xB0, 8, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_REX_IN_BYTE , 1, nullptr}}, + {0xB8, 8, X86InstInfo{"MOV", TYPE_INST, FLAGS_SF_REX_IN_BYTE | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_DISPLACE_SIZE_MUL_2, 4, nullptr}}, + + {0xC2, 1, X86InstInfo{"RET", TYPE_INST, FLAGS_SETS_RIP | FLAGS_BLOCK_END, 2, nullptr}}, + {0xC3, 1, X86InstInfo{"RET", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_BLOCK_END , 0, nullptr}}, + {0xC8, 1, X86InstInfo{"ENTER", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS , 3, nullptr}}, + {0xC9, 1, X86InstInfo{"LEAVE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_BLOCK_END , 0, nullptr}}, + {0xCA, 2, X86InstInfo{"RETF", TYPE_PRIV, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_BLOCK_END, 0, nullptr}}, + {0xCC, 1, X86InstInfo{"INT3", TYPE_INST, FLAGS_DEBUG, 0, nullptr}}, + {0xCD, 1, X86InstInfo{"INT", TYPE_INST, FLAGS_DEBUG , 1, nullptr}}, + {0xCE, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xCF, 1, X86InstInfo{"IRET", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + + {0xD4, 3, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD7, 1, X86InstInfo{"XLAT", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS, 0, nullptr}}, + + {0xE0, 1, X86InstInfo{"LOOPNE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0xE1, 1, X86InstInfo{"LOOPE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0xE2, 1, X86InstInfo{"LOOP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + {0xE3, 1, X86InstInfo{"JrCXZ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1, nullptr}}, + + // Should just throw GP + {0xE4, 2, X86InstInfo{"IN", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE6, 2, X86InstInfo{"OUT", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xE8, 1, X86InstInfo{"CALL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_BLOCK_END , 4, nullptr}}, + {0xE9, 1, X86InstInfo{"JMP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_BLOCK_END , 4, nullptr}}, + {0xEA, 1, X86InstInfo{"[INV]", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xEB, 1, X86InstInfo{"JMP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_BLOCK_END , 1, nullptr}}, + + // Should just throw GP + {0xEC, 2, X86InstInfo{"IN", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xEE, 2, X86InstInfo{"OUT", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xF1, 1, X86InstInfo{"INT1", TYPE_INST, FLAGS_DEBUG, 0, nullptr}}, + {0xF4, 1, X86InstInfo{"HLT", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {0xF5, 1, X86InstInfo{"CMC", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {0xF8, 1, X86InstInfo{"CLC", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {0xF9, 1, X86InstInfo{"STC", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {0xFA, 1, X86InstInfo{"CLI", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0xFB, 1, X86InstInfo{"STI", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0xFC, 1, X86InstInfo{"CLD", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {0xFD, 1, X86InstInfo{"STD", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + + // Two Byte table + {0x0F, 1, X86InstInfo{"", TYPE_SECONDARY_TABLE_PREFIX, FLAGS_NONE, 0, nullptr}}, + + // x87 table + {0xD8, 8, X86InstInfo{"", TYPE_X87_TABLE_PREFIX, FLAGS_NONE, 0, nullptr}}, + + // ModRM table + // MoreBytes field repurposed for valid bits mask + {0x80, 1, X86InstInfo{"", TYPE_GROUP_1, FLAGS_NONE, 0, nullptr}}, + {0x81, 1, X86InstInfo{"", TYPE_GROUP_1, FLAGS_NONE, 1, nullptr}}, + {0x82, 1, X86InstInfo{"", TYPE_GROUP_1, FLAGS_NONE, 2, nullptr}}, + {0x83, 1, X86InstInfo{"", TYPE_GROUP_1, FLAGS_NONE, 3, nullptr}}, + {0xC0, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 0, nullptr}}, + {0xC1, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 1, nullptr}}, + {0xD0, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 2, nullptr}}, + {0xD1, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 3, nullptr}}, + {0xD2, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 4, nullptr}}, + {0xD3, 1, X86InstInfo{"", TYPE_GROUP_2, FLAGS_NONE, 5, nullptr}}, + {0xF6, 1, X86InstInfo{"", TYPE_GROUP_3, FLAGS_NONE, 0, nullptr}}, + {0xF7, 1, X86InstInfo{"", TYPE_GROUP_3, FLAGS_NONE, 1, nullptr}}, + {0xFE, 1, X86InstInfo{"", TYPE_GROUP_4, FLAGS_NONE, 0, nullptr}}, + {0xFF, 1, X86InstInfo{"", TYPE_GROUP_5, FLAGS_NONE, 0, nullptr}}, + + // Group 11 + {0xC6, 1, X86InstInfo{"", TYPE_GROUP_11, FLAGS_NONE, 0, nullptr}}, + {0xC7, 1, X86InstInfo{"", TYPE_GROUP_11, FLAGS_NONE, 1, nullptr}}, + + // VEX table + {0xC4, 2, X86InstInfo{"", TYPE_VEX_TABLE_PREFIX, FLAGS_NONE, 0, nullptr}}, + + // XOP Table + {0x8F, 1, X86InstInfo{"", TYPE_XOP_TABLE_PREFIX, FLAGS_NONE, 0, nullptr}}, + }; + + const std::vector> TwoByteOpTable = { + // Instructions + {0x00, 1, X86InstInfo{"", TYPE_GROUP_6, FLAGS_NONE, 0, nullptr}}, + {0x01, 1, X86InstInfo{"", TYPE_GROUP_7, FLAGS_NONE, 0, nullptr}}, + // These two load segment register data + {0x02, 1, X86InstInfo{"LAR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {0x03, 1, X86InstInfo{"LSL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {0x04, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x05, 1, X86InstInfo{"SYSCALL", TYPE_INST, FLAGS_BLOCK_END, 0, nullptr}}, + {0x06, 1, X86InstInfo{"CLTS", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x07, 1, X86InstInfo{"SYSRET", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x08, 1, X86InstInfo{"INVD", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x09, 1, X86InstInfo{"WBINVD", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x0A, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x0B, 1, X86InstInfo{"UD2", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {0x0C, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x0D, 1, X86InstInfo{"", TYPE_GROUP_P, FLAGS_NONE, 0, nullptr}}, + {0x0E, 1, X86InstInfo{"FEMMS", TYPE_3DNOW_INST, FLAGS_BLOCK_END, 0, nullptr}}, + {0x0F, 1, X86InstInfo{"", TYPE_3DNOW_TABLE, FLAGS_NONE, 0, nullptr}}, + + {0x10, 1, X86InstInfo{"MOVUPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x11, 1, X86InstInfo{"MOVUPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x12, 1, X86InstInfo{"MOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x13, 1, X86InstInfo{"MOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x14, 1, X86InstInfo{"UNPCKLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x15, 1, X86InstInfo{"UNPCKHPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x16, 1, X86InstInfo{"MOVLHPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x17, 1, X86InstInfo{"MOVHPS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SF_HIGH_XMM_REG | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x18, 1, X86InstInfo{"", TYPE_GROUP_16, FLAGS_NONE, 0, nullptr}}, + {0x19, 7, X86InstInfo{"NOP", TYPE_INST, FLAGS_DEBUG | FLAGS_MODRM, 0, nullptr}}, + + {0x20, 2, X86InstInfo{"MOV", TYPE_PRIV, GenFlagsSameSize(SIZE_64BIT) , 0, nullptr}}, + {0x22, 2, X86InstInfo{"MOV", TYPE_PRIV, GenFlagsSameSize(SIZE_64BIT) , 0, nullptr}}, + {0x24, 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x28, 1, X86InstInfo{"MOVAPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x29, 1, X86InstInfo{"MOVAPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2A, 1, X86InstInfo{"CVTPI2PS", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2B, 1, X86InstInfo{"MOVNTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2C, 1, X86InstInfo{"CVTTPS2PI", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2D, 1, X86InstInfo{"CVTPS2PI", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2E, 1, X86InstInfo{"UCOMISS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2F, 1, X86InstInfo{"COMISS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x30, 1, X86InstInfo{"WRMSR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x31, 1, X86InstInfo{"RDTSC", TYPE_INST, FLAGS_DEBUG, 0, nullptr}}, + {0x32, 1, X86InstInfo{"RDMSR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x33, 1, X86InstInfo{"RDPMC", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x34, 1, X86InstInfo{"SYSENTER", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x35, 1, X86InstInfo{"SYSEXIT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0x36, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x38, 1, X86InstInfo{"", TYPE_0F38_TABLE, FLAGS_NONE, 0, nullptr}}, + {0x39, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x3A, 1, X86InstInfo{"", TYPE_0F3A_TABLE, FLAGS_NONE, 0, nullptr}}, + {0x3B, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x40, 1, X86InstInfo{"CMOVO", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x41, 1, X86InstInfo{"CMOVNO", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x42, 1, X86InstInfo{"CMOVB", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x43, 1, X86InstInfo{"CMOVNB", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x44, 1, X86InstInfo{"CMOVZ", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x45, 1, X86InstInfo{"CMOVNZ", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x46, 1, X86InstInfo{"CMOVBE", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x47, 1, X86InstInfo{"CMOVNBE", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x48, 1, X86InstInfo{"CMOVS", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x49, 1, X86InstInfo{"CMOVNS", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4A, 1, X86InstInfo{"CMOVP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4B, 1, X86InstInfo{"CMOVNP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4C, 1, X86InstInfo{"CMOVL", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4D, 1, X86InstInfo{"CMOVNL", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4E, 1, X86InstInfo{"CMOVLE", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0x4F, 1, X86InstInfo{"CMOVNLE", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + + {0x50, 1, X86InstInfo{"MOVMSKPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x51, 1, X86InstInfo{"SQRTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x52, 1, X86InstInfo{"RSQRTSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x53, 1, X86InstInfo{"RCPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x54, 1, X86InstInfo{"ANDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x55, 1, X86InstInfo{"ANDNPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x56, 1, X86InstInfo{"ORPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x57, 1, X86InstInfo{"XORPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x58, 1, X86InstInfo{"ANDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x59, 1, X86InstInfo{"MULPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5A, 1, X86InstInfo{"CVTPS2PD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5B, 1, X86InstInfo{"CVTDQ2PS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5C, 1, X86InstInfo{"SUBPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5D, 1, X86InstInfo{"MINPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5E, 1, X86InstInfo{"DIVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5F, 1, X86InstInfo{"MAXPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x60, 1, X86InstInfo{"PUNPCKLBW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x61, 1, X86InstInfo{"PUNPCKLWD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x62, 1, X86InstInfo{"PUNPCKLDQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x63, 1, X86InstInfo{"PACKSSWB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x64, 1, X86InstInfo{"PCMPGTB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x65, 1, X86InstInfo{"PCMPGTW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x66, 1, X86InstInfo{"PCMPGTD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x67, 1, X86InstInfo{"PACKUSWB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x68, 1, X86InstInfo{"PUNPCKHBW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x69, 1, X86InstInfo{"PUNPCKHBD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6A, 1, X86InstInfo{"PUNPCKHDQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6B, 1, X86InstInfo{"PACKSSDW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6C, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x6E, 1, X86InstInfo{"MOVD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6F, 1, X86InstInfo{"MOVQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x70, 1, X86InstInfo{"PSHUFW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0x71, 1, X86InstInfo{"", TYPE_GROUP_12, FLAGS_NONE, 0, nullptr}}, + {0x72, 1, X86InstInfo{"", TYPE_GROUP_13, FLAGS_NONE, 0, nullptr}}, + {0x73, 1, X86InstInfo{"", TYPE_GROUP_14, FLAGS_NONE, 0, nullptr}}, + {0x74, 1, X86InstInfo{"PCMPEQB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x75, 1, X86InstInfo{"PCMPEQW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x76, 1, X86InstInfo{"PCMPEQD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x77, 1, X86InstInfo{"EMMS", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x78, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x7E, 1, X86InstInfo{"MOVD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7F, 1, X86InstInfo{"MOVQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x80, 1, X86InstInfo{"JO", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x81, 1, X86InstInfo{"JNO", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x82, 1, X86InstInfo{"JB", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x83, 1, X86InstInfo{"JNB", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x84, 1, X86InstInfo{"JZ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x85, 1, X86InstInfo{"JNZ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x86, 1, X86InstInfo{"JBE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x87, 1, X86InstInfo{"JNBE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x88, 1, X86InstInfo{"JS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x89, 1, X86InstInfo{"JNS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8A, 1, X86InstInfo{"JP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8B, 1, X86InstInfo{"JNP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8C, 1, X86InstInfo{"JL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8D, 1, X86InstInfo{"JNL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8E, 1, X86InstInfo{"JLE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + {0x8F, 1, X86InstInfo{"JNLE", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_BLOCK_END | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 , 4, nullptr}}, + + {0x90, 1, X86InstInfo{"SETO", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x91, 1, X86InstInfo{"SETNO", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x92, 1, X86InstInfo{"SETB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x93, 1, X86InstInfo{"SETNB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x94, 1, X86InstInfo{"SETZ", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x95, 1, X86InstInfo{"SETNZ", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x96, 1, X86InstInfo{"SETBE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x97, 1, X86InstInfo{"SETNBE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x98, 1, X86InstInfo{"SETS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x99, 1, X86InstInfo{"SETNS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9A, 1, X86InstInfo{"SETP", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9B, 1, X86InstInfo{"SETNP", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9C, 1, X86InstInfo{"SETL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9D, 1, X86InstInfo{"SETNL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9E, 1, X86InstInfo{"SETLE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0x9F, 1, X86InstInfo{"SETNLE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + {0xA0, 1, X86InstInfo{"PUSH", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xA1, 1, X86InstInfo{"POP FS", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xA2, 1, X86InstInfo{"CPUID", TYPE_INST, FLAGS_DEBUG | FLAGS_SF_SRC_RAX, 0, nullptr}}, + {0xA3, 1, X86InstInfo{"BT", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xA4, 1, X86InstInfo{"SHLD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {0xA5, 1, X86InstInfo{"SHLD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {0xA6, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xA8, 1, X86InstInfo{"PUSH", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xA9, 1, X86InstInfo{"POP GS", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xAA, 1, X86InstInfo{"RSM", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {0xAB, 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xAC, 1, X86InstInfo{"SHRD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {0xAD, 1, X86InstInfo{"SHRD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {0xAE, 1, X86InstInfo{"", TYPE_GROUP_15, FLAGS_NONE, 0, nullptr}}, + {0xAF, 1, X86InstInfo{"IMUL", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + + {0xB0, 1, X86InstInfo{"CMPXCHG", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xB1, 1, X86InstInfo{"CMPXCHG", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xB2, 1, X86InstInfo{"LSS", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB3, 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xB4, 1, X86InstInfo{"LFS", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB5, 1, X86InstInfo{"LGS", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB6, 1, X86InstInfo{"MOVZX", TYPE_INST, GenFlagsSrcSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0xB7, 1, X86InstInfo{"MOVZX", TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0, nullptr}}, + {0xB8, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB9, 1, X86InstInfo{"", TYPE_GROUP_10, FLAGS_NONE, 0, nullptr}}, + {0xBA, 1, X86InstInfo{"", TYPE_GROUP_8, FLAGS_NONE, 0, nullptr}}, + {0xBB, 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xBC, 1, X86InstInfo{"BSF", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0xBD, 1, X86InstInfo{"BSR", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0xBE, 1, X86InstInfo{"MOVSX", TYPE_INST, GenFlagsSrcSize(SIZE_8BIT) | FLAGS_MODRM, 0, nullptr}}, + {0xBF, 1, X86InstInfo{"MOVSX", TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0, nullptr}}, + + {0xC0, 1, X86InstInfo{"XADD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xC1, 1, X86InstInfo{"XADD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xC2, 1, X86InstInfo{"CMPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM, 1, nullptr}}, + {0xC3, 1, X86InstInfo{"MOVNTI", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST, 0, nullptr}}, + {0xC4, 1, X86InstInfo{"PINSRW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}}, + {0xC5, 1, X86InstInfo{"PEXTRW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC6, 1, X86InstInfo{"SHUFPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM, 1, nullptr}}, + {0xC7, 1, X86InstInfo{"", TYPE_GROUP_9, FLAGS_NONE, 0, nullptr}}, + {0xC8, 8, X86InstInfo{"BSWAP", TYPE_INST, FLAGS_SF_REX_IN_BYTE, 0, nullptr}}, + + {0xD0, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD1, 1, X86InstInfo{"PSRLW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD2, 1, X86InstInfo{"PSRLD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD3, 1, X86InstInfo{"PSRLQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD4, 1, X86InstInfo{"PADDQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, // SSE2 extending MMX + {0xD5, 1, X86InstInfo{"PMULLW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD6, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD7, 1, X86InstInfo{"PMOVMSKB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0xD8, 1, X86InstInfo{"PSUBUSB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD9, 1, X86InstInfo{"PSUBUSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDA, 1, X86InstInfo{"PMINUB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDB, 1, X86InstInfo{"PAND", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDC, 1, X86InstInfo{"PADDUSB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDD, 1, X86InstInfo{"PADDUSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDE, 1, X86InstInfo{"PMAXUB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDF, 1, X86InstInfo{"PANDN", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0xE0, 1, X86InstInfo{"PAVGB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE1, 1, X86InstInfo{"PSRAW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE2, 1, X86InstInfo{"PSRAD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE3, 1, X86InstInfo{"PAVGW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE4, 1, X86InstInfo{"PMULHUW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE5, 1, X86InstInfo{"PMULHW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE6, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE7, 1, X86InstInfo{"MOVNTQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE8, 1, X86InstInfo{"PSUBSB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE9, 1, X86InstInfo{"PSUBSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEA, 1, X86InstInfo{"PMINSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEB, 1, X86InstInfo{"POR", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEC, 1, X86InstInfo{"PADDSB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xED, 1, X86InstInfo{"PADDSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEE, 1, X86InstInfo{"PMAXSW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEF, 1, X86InstInfo{"PXOR", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0xF0, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xF1, 1, X86InstInfo{"PSLLW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF2, 1, X86InstInfo{"PSLLD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF3, 1, X86InstInfo{"PSLLQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF4, 1, X86InstInfo{"PMULUDQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF5, 1, X86InstInfo{"PMADDWD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF6, 1, X86InstInfo{"PSADBW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF7, 1, X86InstInfo{"MASKMOVQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF8, 1, X86InstInfo{"PSUBB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF9, 1, X86InstInfo{"PSUBW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFA, 1, X86InstInfo{"PSUBD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFB, 1, X86InstInfo{"PSUBQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFC, 1, X86InstInfo{"PADDB", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFD, 1, X86InstInfo{"PADDW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFE, 1, X86InstInfo{"PADDD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFF, 1, X86InstInfo{"UD0", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + }; + + const std::vector> PrimaryGroupOpTable = { +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) + // GROUP_1 | 0x80 | reg + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 0), 1, X86InstInfo{"ADD", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 1), 1, X86InstInfo{"OR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 2), 1, X86InstInfo{"ADC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 3), 1, X86InstInfo{"SBB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 4), 1, X86InstInfo{"AND", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 5), 1, X86InstInfo{"SUB", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 6), 1, X86InstInfo{"XOR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x80), 7), 1, X86InstInfo{"CMP", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 0), 1, X86InstInfo{"ADD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 1), 1, X86InstInfo{"OR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 2), 1, X86InstInfo{"ADC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 3), 1, X86InstInfo{"SBB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 4), 1, X86InstInfo{"AND", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 5), 1, X86InstInfo{"SUB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 6), 1, X86InstInfo{"XOR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x81), 7), 1, X86InstInfo{"CMP", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + + // Invalid in 64bit mode + {OPD(TYPE_GROUP_1, OpToIndex(0x82), 0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 0), 1, X86InstInfo{"ADD", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 1), 1, X86InstInfo{"OR", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 2), 1, X86InstInfo{"ADC", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 3), 1, X86InstInfo{"SBB", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 4), 1, X86InstInfo{"AND", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 5), 1, X86InstInfo{"SUB", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 6), 1, X86InstInfo{"XOR", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_1, OpToIndex(0x83), 7), 1, X86InstInfo{"CMP", TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + // GROUP 2 + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, X86InstInfo{"ROL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, X86InstInfo{"ROR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, X86InstInfo{"RCL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, X86InstInfo{"RCR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, X86InstInfo{"SHR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, X86InstInfo{"SAR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, X86InstInfo{"ROL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, X86InstInfo{"ROR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, X86InstInfo{"RCL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, X86InstInfo{"RCR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, X86InstInfo{"SHR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, X86InstInfo{"SAR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, X86InstInfo{"ROL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, X86InstInfo{"ROR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, X86InstInfo{"RCL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, X86InstInfo{"RCR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, X86InstInfo{"SHR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, X86InstInfo{"SAR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, X86InstInfo{"ROL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, X86InstInfo{"ROR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, X86InstInfo{"RCL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, X86InstInfo{"RCR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, X86InstInfo{"SHR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, X86InstInfo{"SAR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, X86InstInfo{"ROL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, X86InstInfo{"ROR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, X86InstInfo{"RCL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, X86InstInfo{"RCR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, X86InstInfo{"SHR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, X86InstInfo{"SHL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, X86InstInfo{"SAR", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, X86InstInfo{"ROL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, X86InstInfo{"ROR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, X86InstInfo{"RCL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, X86InstInfo{"RCR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, X86InstInfo{"SHR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 6), 1, X86InstInfo{"SHL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 7), 1, X86InstInfo{"SAR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX, 0, nullptr}}, + + // GROUP 3 + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 0), 1, X86InstInfo{"TEST", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 1), 1, X86InstInfo{"TEST", TYPE_UNDEC, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 2), 1, X86InstInfo{"NOT", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 3), 1, X86InstInfo{"NEG", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 4), 1, X86InstInfo{"MUL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 5), 1, X86InstInfo{"IMUL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, X86InstInfo{"DIV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 7), 1, X86InstInfo{"IDIV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 0), 1, X86InstInfo{"TEST", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 1), 1, X86InstInfo{"TEST", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2, 4, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 2), 1, X86InstInfo{"NOT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 3), 1, X86InstInfo{"NEG", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 4), 1, X86InstInfo{"MUL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 5), 1, X86InstInfo{"IMUL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, X86InstInfo{"DIV", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 7), 1, X86InstInfo{"IDIV", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + + // GROUP 4 + {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 0), 1, X86InstInfo{"INC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 1), 1, X86InstInfo{"DEC", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 2), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 5 + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 0), 1, X86InstInfo{"INC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 1), 1, X86InstInfo{"DEC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 2), 1, X86InstInfo{"CALL", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END , 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 3), 1, X86InstInfo{"CALLF", TYPE_INST, FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 4), 1, X86InstInfo{"JMP", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END , 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 5), 1, X86InstInfo{"JMPF", TYPE_INST, FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 6), 1, X86InstInfo{"PUSH", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 11 + {OPD(TYPE_GROUP_11, OpToIndex(0xC6), 0), 1, X86InstInfo{"MOV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT, 1, nullptr}}, + {OPD(TYPE_GROUP_11, OpToIndex(0xC6), 1), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_11, OpToIndex(0xC7), 0), 1, X86InstInfo{"MOV", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT, 4, nullptr}}, + {OPD(TYPE_GROUP_11, OpToIndex(0xC7), 1), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, +#undef OPD + }; + + const std::vector> RepModOpTable = { + {0x0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x10, 1, X86InstInfo{"MOVSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x11, 1, X86InstInfo{"MOVSS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x12, 1, X86InstInfo{"MOVSLDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x13, 3, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x16, 1, X86InstInfo{"MOVSHDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x17, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x19, 7, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x20, 4, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x24, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x2A, 1, X86InstInfo{"CVTSI2SS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 0, nullptr}}, + {0x2B, 1, X86InstInfo{"MOVNTSS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2C, 1, X86InstInfo{"CVTTSS2SI", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x2D, 1, X86InstInfo{"CVTSS2SI", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x2E, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x30, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x40, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x50, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x51, 1, X86InstInfo{"SQRTSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x52, 1, X86InstInfo{"RSQRTSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x53, 1, X86InstInfo{"RCPSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x54, 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x58, 1, X86InstInfo{"ADDSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x59, 1, X86InstInfo{"MULSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5A, 1, X86InstInfo{"CVTSS2SD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5B, 1, X86InstInfo{"CVTTPS2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5C, 1, X86InstInfo{"SUBSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5D, 1, X86InstInfo{"MINSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5E, 1, X86InstInfo{"DIVSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5F, 1, X86InstInfo{"MAXSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x60, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x68, 7, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x6F, 1, X86InstInfo{"MOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x70, 1, X86InstInfo{"PSHUFHW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0x71, 3, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x74, 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x78, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x7E, 1, X86InstInfo{"MOVQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7F, 1, X86InstInfo{"MOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x80, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x90, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xA0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0xB0, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xB8, 1, X86InstInfo{"POPCNT", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0xB9, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBA, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBB, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xBC, 1, X86InstInfo{"TZCNT", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0xBD, 1, X86InstInfo{"LZCNT", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {0xBE, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xC0, 2, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xC2, 1, X86InstInfo{"CMPSS", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC3, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xC8, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0xD0, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD6, 1, X86InstInfo{"MOVQ2DQ", TYPE_MMX, GenFlagsSameSize(SIZE_128BIT) | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD8, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xE0, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE6, 1, X86InstInfo{"CVTDQ2PD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE8, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xF0, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xF8, 7, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xFF, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + }; + + const std::vector> RepNEModOpTable = { + {0x0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x10, 1, X86InstInfo{"MOVSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x11, 1, X86InstInfo{"MOVSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x12, 1, X86InstInfo{"MOVDDUP", TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x13, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x19, 7, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x20, 4, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x24, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x2A, 1, X86InstInfo{"CVTSI2SD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2B, 1, X86InstInfo{"MOVNTSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2C, 1, X86InstInfo{"CVTTSD2SI", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x2D, 1, X86InstInfo{"CVTSD2SI", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x2E, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x30, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x40, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x50, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x51, 1, X86InstInfo{"SQRTSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x52, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x58, 1, X86InstInfo{"ADDSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x59, 1, X86InstInfo{"MULSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5A, 1, X86InstInfo{"CVTSD2SS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5B, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x5C, 1, X86InstInfo{"SUBSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5D, 1, X86InstInfo{"MINSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5E, 1, X86InstInfo{"DIVSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5F, 1, X86InstInfo{"MAXSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x60, 16, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x70, 1, X86InstInfo{"PSHUFLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0x71, 3, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x74, 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x78, 1, X86InstInfo{"INSERTQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,2, nullptr}}, + {0x79, 1, X86InstInfo{"INSERTQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7A, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x7C, 1, X86InstInfo{"HADDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7D, 1, X86InstInfo{"HSUBPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7E, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x80, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x90, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xA0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xB0, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xB8, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB9, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBA, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBB, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xC0, 2, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xC2, 1, X86InstInfo{"CMPSD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC3, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xC8, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0xD0, 1, X86InstInfo{"ADDSUBPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD1, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD6, 1, X86InstInfo{"MOVDQ2Q", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xD8, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xE0, 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE6, 1, X86InstInfo{"CVTPD2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xE8, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xF0, 1, X86InstInfo{"LDDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS,0, nullptr}}, + {0xF1, 7, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xF8, 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + }; + + const std::vector> OpSizeModOpTable = { + {0x0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x10, 1, X86InstInfo{"MOVUPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x11, 1, X86InstInfo{"MOVUPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x12, 1, X86InstInfo{"MOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x13, 1, X86InstInfo{"MOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x14, 1, X86InstInfo{"UNPCKLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x15, 1, X86InstInfo{"UNPCKHPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x16, 1, X86InstInfo{"MOVHPD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x17, 1, X86InstInfo{"MOVHPD", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x18, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x19, 7, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x20, 4, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x24, 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0x28, 1, X86InstInfo{"MOVAPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x29, 1, X86InstInfo{"MOVAPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2A, 1, X86InstInfo{"CVTPI2PD", TYPE_MMX, FLAGS_NONE, 0, nullptr}}, + {0x2B, 1, X86InstInfo{"MOVNTPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2C, 1, X86InstInfo{"CVTTPD2PI", TYPE_MMX, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2D, 1, X86InstInfo{"CVTPD2PI", TYPE_MMX, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2E, 1, X86InstInfo{"UCOMISD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x2F, 1, X86InstInfo{"COMISD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x30, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x40, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0x50, 1, X86InstInfo{"MOVMSKPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0x51, 1, X86InstInfo{"SQRTPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x52, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x54, 1, X86InstInfo{"ANDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x55, 1, X86InstInfo{"ANDNPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x56, 1, X86InstInfo{"ORPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x57, 1, X86InstInfo{"XORPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x58, 1, X86InstInfo{"ADDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x59, 1, X86InstInfo{"MULPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5A, 1, X86InstInfo{"CVTPD2PS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5B, 1, X86InstInfo{"CVTPS2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5C, 1, X86InstInfo{"SUBPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5D, 1, X86InstInfo{"MINPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5E, 1, X86InstInfo{"DIVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x5F, 1, X86InstInfo{"MAXPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x60, 1, X86InstInfo{"PUNPCKLBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x61, 1, X86InstInfo{"PUNPCKLWD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x62, 1, X86InstInfo{"PUNPCKLDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x63, 1, X86InstInfo{"PACKSSWB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x64, 1, X86InstInfo{"PCMPGTB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x65, 1, X86InstInfo{"PCMPGTW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x66, 1, X86InstInfo{"PCMPGTD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x67, 1, X86InstInfo{"PACKUSWB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x68, 1, X86InstInfo{"PUNPCKHBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x69, 1, X86InstInfo{"PUNPCKHWD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6A, 1, X86InstInfo{"PUNPCKHDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6B, 1, X86InstInfo{"PACKSSDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6C, 1, X86InstInfo{"PUNPCKLQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6D, 1, X86InstInfo{"PUNPCKHQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6E, 1, X86InstInfo{"MOVD", TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x6F, 1, X86InstInfo{"MOVDQA", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x70, 1, X86InstInfo{"PSHUFD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0x71, 3, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x74, 1, X86InstInfo{"PCMPEQB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x75, 1, X86InstInfo{"PCMPEQW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x76, 1, X86InstInfo{"PCMPEQD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x77, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x78, 1, X86InstInfo{"", TYPE_GROUP_17, FLAGS_NONE, 0, nullptr}}, + + {0x79, 1, X86InstInfo{"EXTRQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7A, 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0x7C, 1, X86InstInfo{"HADDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7D, 1, X86InstInfo{"HSUBPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7E, 1, X86InstInfo{"MOVD", TYPE_INST, GenFlagsSrcSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0x7F, 1, X86InstInfo{"MOVDQA", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0x80, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0x90, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xA0, 16, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xB0, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xB8, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xB9, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBA, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xBB, 5, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {0xC0, 2, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + {0xC2, 1, X86InstInfo{"CMPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC3, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xC4, 1, X86InstInfo{"PINSRW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC5, 1, X86InstInfo{"PEXTRW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC6, 1, X86InstInfo{"SHUFPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, + {0xC7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xC8, 8, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + + {0xD0, 1, X86InstInfo{"ADDSUBPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD1, 1, X86InstInfo{"PSRLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD2, 1, X86InstInfo{"PSRLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD3, 1, X86InstInfo{"PSRLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD4, 1, X86InstInfo{"PADDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD5, 1, X86InstInfo{"PMULLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD6, 1, X86InstInfo{"MOVQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD7, 1, X86InstInfo{"PMOVMSKB", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0, nullptr}}, + {0xD8, 1, X86InstInfo{"PSUBUSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xD9, 1, X86InstInfo{"PSUBUSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDA, 1, X86InstInfo{"PMINUB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDB, 1, X86InstInfo{"PAND", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDC, 1, X86InstInfo{"PADDUSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDD, 1, X86InstInfo{"PADDUSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDE, 1, X86InstInfo{"PMAXUB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xDF, 1, X86InstInfo{"PANDN", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0xE0, 1, X86InstInfo{"PAVGB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE1, 1, X86InstInfo{"PSRAW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE2, 1, X86InstInfo{"PSRAD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE3, 1, X86InstInfo{"PAVGW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE4, 1, X86InstInfo{"PMULHUW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE5, 1, X86InstInfo{"PMULHW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE6, 1, X86InstInfo{"CVTTPD2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE7, 1, X86InstInfo{"MOVNTDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE8, 1, X86InstInfo{"PSUBSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xE9, 1, X86InstInfo{"PSUBSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEA, 1, X86InstInfo{"PMINSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEB, 1, X86InstInfo{"POR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEC, 1, X86InstInfo{"PADDSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xED, 1, X86InstInfo{"PADDSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEE, 1, X86InstInfo{"PMAXSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xEF, 1, X86InstInfo{"PXOR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + + {0xF0, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {0xF1, 1, X86InstInfo{"PSLLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF2, 1, X86InstInfo{"PSLLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF3, 1, X86InstInfo{"PSLLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF4, 1, X86InstInfo{"PMULUDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF5, 1, X86InstInfo{"PMADDWD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF6, 1, X86InstInfo{"PSADBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF7, 1, X86InstInfo{"MASKMOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF8, 1, X86InstInfo{"PSUBB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xF9, 1, X86InstInfo{"PSUBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFA, 1, X86InstInfo{"PSUBD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFB, 1, X86InstInfo{"PSUBQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFC, 1, X86InstInfo{"PADDB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFD, 1, X86InstInfo{"PADDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFE, 1, X86InstInfo{"PADDD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, + {0xFF, 1, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}}, + }; + +#define PF_NONE 0 +#define PF_F3 1 +#define PF_66 2 +#define PF_F2 3 +#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) + const std::vector> SecondaryExtensionOpTable = { + // GROUP 1 + // GROUP 2 + // GROUP 3 + // GROUP 4 + // GROUP 5 + // Pulls from other MODRM table + + // GROUP 6 + {OPD(TYPE_GROUP_6, PF_NONE, 0), 1, X86InstInfo{"SLDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 1), 1, X86InstInfo{"STR", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 2), 1, X86InstInfo{"LLDT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 3), 1, X86InstInfo{"LTR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 4), 1, X86InstInfo{"VERR", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 5), 1, X86InstInfo{"VERW", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_6, PF_F3, 0), 1, X86InstInfo{"SLDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 1), 1, X86InstInfo{"STR", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 2), 1, X86InstInfo{"LLDT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 3), 1, X86InstInfo{"LTR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 4), 1, X86InstInfo{"VERR", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 5), 1, X86InstInfo{"VERW", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_6, PF_66, 0), 1, X86InstInfo{"SLDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 1), 1, X86InstInfo{"STR", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 2), 1, X86InstInfo{"LLDT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 3), 1, X86InstInfo{"LTR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 4), 1, X86InstInfo{"VERR", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 5), 1, X86InstInfo{"VERW", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_6, PF_F2, 0), 1, X86InstInfo{"SLDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 1), 1, X86InstInfo{"STR", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 2), 1, X86InstInfo{"LLDT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 3), 1, X86InstInfo{"LTR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 4), 1, X86InstInfo{"VERR", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 5), 1, X86InstInfo{"VERW", TYPE_UNDEC, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_6, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 7 + {OPD(TYPE_GROUP_7, PF_NONE, 0), 1, X86InstInfo{"SGDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 1), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 2), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 3), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 6), 1, X86InstInfo{"LMSW", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_NONE, 7), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_7, PF_F3, 0), 1, X86InstInfo{"SGDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 1), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 2), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 3), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 6), 1, X86InstInfo{"LMSW", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F3, 7), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_7, PF_66, 0), 1, X86InstInfo{"SGDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 1), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 2), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 3), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 6), 1, X86InstInfo{"LMSW", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_66, 7), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_7, PF_F2, 0), 1, X86InstInfo{"SGDT", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 1), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 2), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 3), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 6), 1, X86InstInfo{"LMSW", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_7, PF_F2, 7), 1, X86InstInfo{"", TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0, nullptr}}, + + // GROUP 8 + {OPD(TYPE_GROUP_8, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 4), 1, X86InstInfo{"BT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_NONE, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_8, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 4), 1, X86InstInfo{"BT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F3, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_8, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 4), 1, X86InstInfo{"BT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_66, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + {OPD(TYPE_GROUP_8, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 4), 1, X86InstInfo{"BT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + {OPD(TYPE_GROUP_8, PF_F2, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, nullptr}}, + + // GROUP 9 + + // AMD documentation is a bit broken for Group 9 + // Claims the entire group has n/a applied for the prefix (Implies that the prefix is ignored) + // RDRAND/RDSEED only work with no prefix + // CMPXCHG8B/16B works with all prefixes + // Tooling fails to decode CMPXCHG with prefix + {OPD(TYPE_GROUP_9, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 1), 1, X86InstInfo{"CMPXCHG16B", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 6), 1, X86InstInfo{"RDRAND", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_NONE, 7), 1, X86InstInfo{"RDSEED", TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, nullptr}}, + + {OPD(TYPE_GROUP_9, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 1), 1, X86InstInfo{"CMPXCHG16B", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_9, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 1), 1, X86InstInfo{"CMPXCHG16B", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_9, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 1), 1, X86InstInfo{"CMPXCHG16B", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_9, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 10 + {OPD(TYPE_GROUP_10, PF_NONE, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_NONE, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + + {OPD(TYPE_GROUP_10, PF_F3, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F3, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + + {OPD(TYPE_GROUP_10, PF_66, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_66, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + + {OPD(TYPE_GROUP_10, PF_F2, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + {OPD(TYPE_GROUP_10, PF_F2, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_DEBUG | FLAGS_BLOCK_END, 0, nullptr}}, + + // GROUP 12 + {OPD(TYPE_GROUP_12, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 2), 1, X86InstInfo{"PSRLW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 4), 1, X86InstInfo{"PSRAW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 6), 1, X86InstInfo{"PSLLW", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_12, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 2), 1, X86InstInfo{"PSRLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 4), 1, X86InstInfo{"PSRAW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 6), 1, X86InstInfo{"PSLLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_12, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_12, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_12, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_12, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 13 + {OPD(TYPE_GROUP_13, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 2), 1, X86InstInfo{"PSRLD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 4), 1, X86InstInfo{"PSRAD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 6), 1, X86InstInfo{"PSLLD", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_13, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 2), 1, X86InstInfo{"PSRLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 4), 1, X86InstInfo{"PSRAD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 6), 1, X86InstInfo{"PSLLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_13, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_13, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_13, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_13, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 14 + {OPD(TYPE_GROUP_14, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 2), 1, X86InstInfo{"PSRLQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 6), 1, X86InstInfo{"PSLLQ", TYPE_MMX, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_14, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_14, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 2), 1, X86InstInfo{"PSRLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 3), 1, X86InstInfo{"PSRLDQ",TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 6), 1, X86InstInfo{"PSLLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + {OPD(TYPE_GROUP_14, PF_66, 7), 1, X86InstInfo{"PSLLDQ",TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 1, nullptr}}, + + {OPD(TYPE_GROUP_14, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_14, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_14, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 15 + {OPD(TYPE_GROUP_15, PF_NONE, 0), 1, X86InstInfo{"FXSAVE", TYPE_MMX, FLAGS_NONE, 0, nullptr}}, // MMX/x87 + {OPD(TYPE_GROUP_15, PF_NONE, 1), 1, X86InstInfo{"FXRSTOR", TYPE_MMX, FLAGS_NONE, 0, nullptr}}, // MMX/x87 + {OPD(TYPE_GROUP_15, PF_NONE, 2), 1, X86InstInfo{"LDMXCSR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_NONE, 3), 1, X86InstInfo{"STMXCSR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_NONE, 4), 1, X86InstInfo{"XSAVE", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_NONE, 5), 1, X86InstInfo{"LFENCE/XRSTOR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_NONE, 7), 1, X86InstInfo{"SFENCE/CLFLUSH", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_15, PF_F3, 0), 1, X86InstInfo{"RDFSBASE", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 1), 1, X86InstInfo{"RDGSBASE", TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 2), 1, X86InstInfo{"WRFSBASE", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 3), 1, X86InstInfo{"WRGSBASE", TYPE_PRIV, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_15, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_15, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_15, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP 16 + // AMD documentation claims again that this entire group is n/a to prefix + // Tooling once again fails to disassemble oens with the prefix. Disable until proven otherwise + {OPD(TYPE_GROUP_16, PF_NONE, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 1), 1, X86InstInfo{"PREFETCH T0", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 2), 1, X86InstInfo{"PREFETCH T1", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 3), 1, X86InstInfo{"PREFETCH T2", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 4), 1, X86InstInfo{"NOP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 5), 1, X86InstInfo{"NOP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 6), 1, X86InstInfo{"NOP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_NONE, 7), 1, X86InstInfo{"NOP", TYPE_INST, FLAGS_MODRM, 0, nullptr}}, + + {OPD(TYPE_GROUP_16, PF_F3, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 1), 1, X86InstInfo{"PREFETCH T0", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 2), 1, X86InstInfo{"PREFETCH T1", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 3), 1, X86InstInfo{"PREFETCH T2", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 4), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 5), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 6), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F3, 7), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + + {OPD(TYPE_GROUP_16, PF_66, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 1), 1, X86InstInfo{"PREFETCH T0", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 2), 1, X86InstInfo{"PREFETCH T1", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 3), 1, X86InstInfo{"PREFETCH T2", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 4), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 5), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 6), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_66, 7), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + + {OPD(TYPE_GROUP_16, PF_F2, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 1), 1, X86InstInfo{"PREFETCH T0", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 2), 1, X86InstInfo{"PREFETCH T1", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 3), 1, X86InstInfo{"PREFETCH T2", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 4), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 5), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 6), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + {OPD(TYPE_GROUP_16, PF_F2, 7), 1, X86InstInfo{"NOP", TYPE_INVALID, FLAGS_MODRM, 0, nullptr}}, + + // GROUP 17 + {OPD(TYPE_GROUP_17, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_17, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_17, PF_66, 0), 1, X86InstInfo{"EXTRQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 2, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + {OPD(TYPE_GROUP_17, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(TYPE_GROUP_17, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // GROUP P + // AMD documentation claims n/a for all instructions in Group P + // It also claims that instructions /2, /4, /5, /6, /7 all alias to /0 + // It claims that /3 is still Prefetch Mod + // Tooling fails to decode past the /2 encoding but runs fine in hardware + // Hardware also runs all the prefixes correctly + {OPD(TYPE_GROUP_P, PF_NONE, 0), 1, X86InstInfo{"PREFETCH Ex", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_NONE, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + + {OPD(TYPE_GROUP_P, PF_F3, 0), 1, X86InstInfo{"PREFETCH Ex", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F3, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + + {OPD(TYPE_GROUP_P, PF_66, 0), 1, X86InstInfo{"PREFETCH Ex", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_66, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + + {OPD(TYPE_GROUP_P, PF_F2, 0), 1, X86InstInfo{"PREFETCH Ex", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + {OPD(TYPE_GROUP_P, PF_F2, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INVALID, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}}, + }; +#undef OPD + + const std::vector> SecondaryModRMExtensionOpTable = { + // REG /1 + {((0 << 3) | 0), 1, X86InstInfo{"MONITOR", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 1), 1, X86InstInfo{"MWAIT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((0 << 3) | 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // REG /2 + {((1 << 3) | 0), 1, X86InstInfo{"XGETBV", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 1), 1, X86InstInfo{"XSETBV", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((1 << 3) | 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + + // REG /3 + {((2 << 3) | 0), 1, X86InstInfo{"VMRUN", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 1), 1, X86InstInfo{"VMMCALL", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 2), 1, X86InstInfo{"VMLOAD", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 3), 1, X86InstInfo{"VMSAVE", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 4), 1, X86InstInfo{"STGI", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 5), 1, X86InstInfo{"CLGI", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 6), 1, X86InstInfo{"SKINIT", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((2 << 3) | 7), 1, X86InstInfo{"INVLPGA", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + + // REG /7 + {((3 << 3) | 0), 1, X86InstInfo{"SWAPGS", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 1), 1, X86InstInfo{"RDTSCP", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 2), 1, X86InstInfo{"MONITORX", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 3), 1, X86InstInfo{"MWAITX", TYPE_PRIV, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {((3 << 3) | 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + }; + +#define OPD(op, modrmop) (((op - 0xD8) << 8) | modrmop) +#define OPDReg(op, reg) (((op - 0xD8) << 8) | (reg << 3)) + const std::vector> X87OpTable = { + // 0xD8 + {OPDReg(0xD8, 0), 1, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 1), 1, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 2), 1, X86InstInfo{"FCOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 3), 1, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 4), 1, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 5), 1, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 6), 1, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD8, 7), 1, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xD8, 0xC0), 8, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xD8, 0xC8), 8, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xD8, 0xD0), 8, X86InstInfo{"FCOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xD8, 0xD8), 8, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xD8, 0xE0), 8, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xD8, 0xE8), 8, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xD8, 0xF0), 8, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xD8, 0xF8), 8, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // 0xD9 + {OPDReg(0xD9, 0), 1, X86InstInfo{"FLD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPDReg(0xD9, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 2), 1, X86InstInfo{"FST", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 3), 1, X86InstInfo{"FSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 4), 1, X86InstInfo{"FLDENV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 5), 1, X86InstInfo{"FLDCW", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 6), 1, X86InstInfo{"FNSTENV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xD9, 7), 1, X86InstInfo{"FNSTCW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + // / 0 + {OPD(0xD9, 0xC0), 8, X86InstInfo{"FLD", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xD9, 0xC8), 8, X86InstInfo{"FXCH", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xD9, 0xD0), 1, X86InstInfo{"FNOP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xD1), 7, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xD9, 0xD8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xD9, 0xE0), 1, X86InstInfo{"FCHS", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE1), 1, X86InstInfo{"FABS", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE2), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE4), 1, X86InstInfo{"FTST", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE5), 1, X86InstInfo{"FXAM", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE6), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xD9, 0xE8), 1, X86InstInfo{"FLD1", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xE9), 1, X86InstInfo{"FLDL2T", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xEA), 1, X86InstInfo{"FLDL2E", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xEB), 1, X86InstInfo{"FLDPI", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xEC), 1, X86InstInfo{"FLDLG2", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xED), 1, X86InstInfo{"FLDLN2", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xEE), 1, X86InstInfo{"FLDZ", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xEF), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xD9, 0xF0), 1, X86InstInfo{"F2XM1", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF1), 1, X86InstInfo{"FYL2X", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF2), 1, X86InstInfo{"FPTAN", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF3), 1, X86InstInfo{"FPATAN", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF4), 1, X86InstInfo{"FXTRACT", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF5), 1, X86InstInfo{"FPREM1", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF6), 1, X86InstInfo{"FDECSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF7), 1, X86InstInfo{"FINCSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xD9, 0xF8), 1, X86InstInfo{"FPREM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xF9), 1, X86InstInfo{"FYL2XP1", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFA), 1, X86InstInfo{"FSQRT", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFB), 1, X86InstInfo{"FSINCOS", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFC), 1, X86InstInfo{"FRNDINT", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFD), 1, X86InstInfo{"FSCALE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFE), 1, X86InstInfo{"FSIN", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xD9, 0xFF), 1, X86InstInfo{"FCOS", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // 0xDA + {OPDReg(0xDA, 0), 1, X86InstInfo{"FIADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 1), 1, X86InstInfo{"FIMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 2), 1, X86InstInfo{"FICOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 3), 1, X86InstInfo{"FICOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 4), 1, X86InstInfo{"FISUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 5), 1, X86InstInfo{"FISUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 6), 1, X86InstInfo{"FIDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDA, 7), 1, X86InstInfo{"FIDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDA, 0xC0), 8, X86InstInfo{"FCMOVB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDA, 0xC8), 8, X86InstInfo{"FCMOVE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDA, 0xD0), 8, X86InstInfo{"FCMOVBE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDA, 0xD8), 8, X86InstInfo{"FCMOVU", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDA, 0xE0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDA, 0xE8), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDA, 0xE9), 1, X86InstInfo{"FUCOMPP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDA, 0xEA), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDA, 0xF0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDA, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // 0xDB + {OPDReg(0xDB, 0), 1, X86InstInfo{"FILD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 2), 1, X86InstInfo{"FIST", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 3), 1, X86InstInfo{"FISTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 5), 1, X86InstInfo{"FLD", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}}, + {OPDReg(0xDB, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDB, 7), 1, X86InstInfo{"FSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDB, 0xC0), 8, X86InstInfo{"FCMOVNB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDB, 0xC8), 8, X86InstInfo{"FCMOVNE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDB, 0xD0), 8, X86InstInfo{"FCMOVNBE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDB, 0xD8), 8, X86InstInfo{"FCMOVNU", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDB, 0xE0), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDB, 0xE2), 1, X86InstInfo{"FNCLEX", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDB, 0xE3), 1, X86InstInfo{"FNINIT", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDB, 0xE4), 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDB, 0xE8), 8, X86InstInfo{"FUCOMI", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDB, 0xF0), 8, X86InstInfo{"FCOMI", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDB, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // 0xDC + {OPDReg(0xDC, 0), 1, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 1), 1, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 2), 1, X86InstInfo{"FCOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 3), 1, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 4), 1, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 5), 1, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 6), 1, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDC, 7), 1, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDC, 0xC0), 8, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDC, 0xC8), 8, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDC, 0xD0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDC, 0xD8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDC, 0xE0), 8, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDC, 0xE8), 8, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDC, 0xF0), 8, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDC, 0xF8), 8, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // 0xDD + {OPDReg(0xDD, 0), 1, X86InstInfo{"FLD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 2), 1, X86InstInfo{"FST", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 3), 1, X86InstInfo{"FSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 4), 1, X86InstInfo{"FRSTOR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 6), 1, X86InstInfo{"FNSAVE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDD, 7), 1, X86InstInfo{"FNSTSW", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDD, 0xC0), 8, X86InstInfo{"FFREE", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDD, 0xC8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDD, 0xD0), 8, X86InstInfo{"FST", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDD, 0xD8), 8, X86InstInfo{"FSTP", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDD, 0xE0), 8, X86InstInfo{"FUCOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDD, 0xE8), 8, X86InstInfo{"FUCOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDD, 0xF0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDD, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // 0xDE + {OPDReg(0xDE, 0), 1, X86InstInfo{"FIADD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 1), 1, X86InstInfo{"FIMUL", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 2), 1, X86InstInfo{"FICOM", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 3), 1, X86InstInfo{"FICOMP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 4), 1, X86InstInfo{"FISUB", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 5), 1, X86InstInfo{"FISUBR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 6), 1, X86InstInfo{"FIDIV", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDE, 7), 1, X86InstInfo{"FIDIVR", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDE, 0xC0), 8, X86InstInfo{"FADDP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDE, 0xC8), 8, X86InstInfo{"FMULP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDE, 0xD0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDE, 0xD8), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDE, 0xD9), 1, X86InstInfo{"FCOMPP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDE, 0xDA), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDE, 0xE0), 8, X86InstInfo{"FSUBRP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDE, 0xE8), 8, X86InstInfo{"FSUBP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDE, 0xF0), 8, X86InstInfo{"FDIVRP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDE, 0xF8), 8, X86InstInfo{"FDIVP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // 0xDF + {OPDReg(0xDF, 0), 1, X86InstInfo{"FILD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 2), 1, X86InstInfo{"FIST", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 3), 1, X86InstInfo{"FISTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 4), 1, X86InstInfo{"FBLD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 5), 1, X86InstInfo{"FILD", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 6), 1, X86InstInfo{"FBSTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + {OPDReg(0xDF, 7), 1, X86InstInfo{"FISTP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 0 + {OPD(0xDF, 0xC0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 1 + {OPD(0xDF, 0xC8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 2 + {OPD(0xDF, 0xD0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 3 + {OPD(0xDF, 0xD8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 4 + {OPD(0xDF, 0xE0), 1, X86InstInfo{"FNSTSW", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + {OPD(0xDF, 0xE1), 7, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + // / 5 + {OPD(0xDF, 0xE8), 8, X86InstInfo{"FUCOMIP", TYPE_INST, FLAGS_NONE, 0, nullptr}}, + // / 6 + {OPD(0xDF, 0xF0), 8, X86InstInfo{"FCOMIP", TYPE_X87, FLAGS_NONE, 0, nullptr}}, + // / 7 + {OPD(0xDF, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}}, + }; +#undef OPD +#undef OPDReg + + const std::vector> DDDNowOpTable = { + {0x0C, 1, X86InstInfo{"PI2FW", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x0D, 1, X86InstInfo{"PI2FD", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x1C, 1, X86InstInfo{"PF2IW", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x1D, 1, X86InstInfo{"PF2ID", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0x8A, 1, X86InstInfo{"PFNACC", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x8E, 1, X86InstInfo{"PFPNACC", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0x9A, 1, X86InstInfo{"PFSUB", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x9E, 1, X86InstInfo{"PFADD", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0xAA, 1, X86InstInfo{"PFSUBR", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xAE, 1, X86InstInfo{"PFACC", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0xBB, 1, X86InstInfo{"PSWAPD", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xBF, 1, X86InstInfo{"PAVGUSB", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0x90, 1, X86InstInfo{"PFCMPGE", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x94, 1, X86InstInfo{"PFMIN", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x96, 1, X86InstInfo{"PFRCP", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0x97, 1, X86InstInfo{"PFRSQRT", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0xA0, 1, X86InstInfo{"PFCMPGT", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xA4, 1, X86InstInfo{"PFMAX", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xA6, 1, X86InstInfo{"PFRCPIT1", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xA7, 1, X86InstInfo{"PFRSQIT1", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + + {0xB0, 1, X86InstInfo{"PFCMPEQ", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xB4, 1, X86InstInfo{"PFMUL", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xB6, 1, X86InstInfo{"PFRCPIT2", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + {0xB7, 1, X86InstInfo{"PMULHRW", TYPE_3DNOW_INST, FLAGS_NONE, 0, nullptr}}, + }; + +#define OPD(prefix, opcode) ((prefix << 8) | opcode) +#define PF_38_NONE 0 +#define PF_38_66 1 +#define PF_38_F2 2 + + const std::vector> H0F38Table = { + {OPD(PF_38_NONE, 0x00), 1, X86InstInfo{"PSHUFB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x00), 1, X86InstInfo{"PSHUFB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x01), 1, X86InstInfo{"PHADDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x01), 1, X86InstInfo{"PHADDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x02), 1, X86InstInfo{"PHADDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x02), 1, X86InstInfo{"PHADDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x03), 1, X86InstInfo{"PHADDSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x03), 1, X86InstInfo{"PHADDSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x04), 1, X86InstInfo{"PMADDUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x04), 1, X86InstInfo{"PMADDUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x05), 1, X86InstInfo{"PHSUBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x05), 1, X86InstInfo{"PHSUBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x06), 1, X86InstInfo{"PHSUBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x06), 1, X86InstInfo{"PHSUBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x07), 1, X86InstInfo{"PHSUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x07), 1, X86InstInfo{"PHSUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x08), 1, X86InstInfo{"PSIGNB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x08), 1, X86InstInfo{"PSIGNB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x09), 1, X86InstInfo{"PSIGNW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x09), 1, X86InstInfo{"PSIGNW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x0A), 1, X86InstInfo{"PSIGND", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x0A), 1, X86InstInfo{"PSIGND", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x0B), 1, X86InstInfo{"PMULHRSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x0B), 1, X86InstInfo{"PMULHRSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0x10), 1, X86InstInfo{"PBLENDVB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x14), 1, X86InstInfo{"BLENDVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x15), 1, X86InstInfo{"BLENDVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x17), 1, X86InstInfo{"PTEST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x1C), 1, X86InstInfo{"PABSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x1C), 1, X86InstInfo{"PABSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x1D), 1, X86InstInfo{"PABSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x1D), 1, X86InstInfo{"PABSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0x1E), 1, X86InstInfo{"PABSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x1E), 1, X86InstInfo{"PABSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0x20), 1, X86InstInfo{"PMOVSXBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x21), 1, X86InstInfo{"PMOVSXBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x22), 1, X86InstInfo{"PMOVSXBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x23), 1, X86InstInfo{"PMOVSXWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x24), 1, X86InstInfo{"PMOVSXWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x25), 1, X86InstInfo{"PMOVSXDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x28), 1, X86InstInfo{"PMULDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x29), 1, X86InstInfo{"PCMPEQQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x2A), 1, X86InstInfo{"MOVNTDQA", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x2B), 1, X86InstInfo{"PACKUSDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0x30), 1, X86InstInfo{"PMOVZXBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x31), 1, X86InstInfo{"PMOVZXBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x32), 1, X86InstInfo{"PMOVZXBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x33), 1, X86InstInfo{"PMOVZXWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x34), 1, X86InstInfo{"PMOVZXWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x35), 1, X86InstInfo{"PMOVZXDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x38), 1, X86InstInfo{"PMINSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x39), 1, X86InstInfo{"PMINSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3A), 1, X86InstInfo{"PMINUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3B), 1, X86InstInfo{"PMINUD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3C), 1, X86InstInfo{"PMAXSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3D), 1, X86InstInfo{"PMAXSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3E), 1, X86InstInfo{"PMAXUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x3F), 1, X86InstInfo{"PMAXUD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0x40), 1, X86InstInfo{"PMULLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0x41), 1, X86InstInfo{"PHMINPOSUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0xDB), 1, X86InstInfo{"AESIMC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0xDC), 1, X86InstInfo{"AESENC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0xDD), 1, X86InstInfo{"AESENCLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0xDE), 1, X86InstInfo{"AESDEC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0xDF), 1, X86InstInfo{"AESDECLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_NONE, 0xF0), 1, X86InstInfo{"MOVBE", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_NONE, 0xF1), 1, X86InstInfo{"MOVBE", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_66, 0xF0), 1, X86InstInfo{"CRC32", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_66, 0xF1), 1, X86InstInfo{"CRC32", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(PF_38_F2, 0xF0), 1, X86InstInfo{"CRC32", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(PF_38_F2, 0xF1), 1, X86InstInfo{"CRC32", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + }; +#undef PF_38_NONE +#undef PF_38_66 +#undef PF_38_F2 +#undef OPD + +#define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode) +#define PF_3A_NONE 0 +#define PF_3A_66 1 + + const std::vector> H0F3ATable = { + {OPD(0, PF_3A_NONE, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x08), 1, X86InstInfo{"ROUNDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x09), 1, X86InstInfo{"ROUNDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0A), 1, X86InstInfo{"ROUNDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0B), 1, X86InstInfo{"ROUNDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0C), 1, X86InstInfo{"BLENDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0D), 1, X86InstInfo{"BLENDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0E), 1, X86InstInfo{"PBLENDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(0, PF_3A_66, 0x14), 1, X86InstInfo{"PEXTRB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x15), 1, X86InstInfo{"PEXTRW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x16), 1, X86InstInfo{"PEXTRD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, PF_3A_66, 0x16), 1, X86InstInfo{"PEXTRD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x17), 1, X86InstInfo{"EXTRACTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(0, PF_3A_66, 0x20), 1, X86InstInfo{"PINSRB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x21), 1, X86InstInfo{"INSERTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x22), 1, X86InstInfo{"PINSRD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, PF_3A_66, 0x22), 1, X86InstInfo{"PINSRQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(0, PF_3A_66, 0x40), 1, X86InstInfo{"DPPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x41), 1, X86InstInfo{"DPPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x42), 1, X86InstInfo{"MPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x44), 1, X86InstInfo{"PCLMULQDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(0, PF_3A_66, 0x60), 1, X86InstInfo{"PCMPESTRM", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x61), 1, X86InstInfo{"PCMPESTRI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x62), 1, X86InstInfo{"PCMPISTRM", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(0, PF_3A_66, 0x63), 1, X86InstInfo{"PCMPISTRI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(0, PF_3A_66, 0xDF), 1, X86InstInfo{"AESKEYGENASSIST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + }; +#undef PF_3A_NONE +#undef PF_3A_66 + +#undef OPD + +#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode)) + const std::vector> VEXTable = { + // Map 0 (Reserved) + // VEX Map 1 + {OPD(1, 0b00, 0x10), 1, X86InstInfo{"VMOVUPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x10), 1, X86InstInfo{"VMODUPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x10), 1, X86InstInfo{"VMOVSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x10), 1, X86InstInfo{"VMOVSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x11), 1, X86InstInfo{"VMOVUPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x11), 1, X86InstInfo{"VMODUPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x11), 1, X86InstInfo{"VMOVSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x11), 1, X86InstInfo{"VMOVSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x12), 1, X86InstInfo{"VMOVDDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x13), 1, X86InstInfo{"VMOVLPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x13), 1, X86InstInfo{"VMOVLPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x14), 1, X86InstInfo{"VUNPCKLPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x14), 1, X86InstInfo{"VUNPCKLPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x15), 1, X86InstInfo{"VUNPCKHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x15), 1, X86InstInfo{"VUNPCKHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x16), 1, X86InstInfo{"VMOVHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x16), 1, X86InstInfo{"VMOVHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x16), 1, X86InstInfo{"VMOVSHDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x17), 1, X86InstInfo{"VMOVHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x17), 1, X86InstInfo{"VMOVHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x50), 1, X86InstInfo{"VMOVMSKPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x50), 1, X86InstInfo{"VMOVMSKPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x51), 1, X86InstInfo{"VSQRTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x51), 1, X86InstInfo{"VSQRTPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x51), 1, X86InstInfo{"VSQRTSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x51), 1, X86InstInfo{"VSQRTSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x52), 1, X86InstInfo{"VRSQRTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x52), 1, X86InstInfo{"VRSQRTSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x53), 1, X86InstInfo{"VRCPPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x53), 1, X86InstInfo{"VRCPSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x54), 1, X86InstInfo{"VANDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x54), 1, X86InstInfo{"VANDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x55), 1, X86InstInfo{"VANDNPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x55), 1, X86InstInfo{"VANDNPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x56), 1, X86InstInfo{"VORPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x56), 1, X86InstInfo{"VORPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x57), 1, X86InstInfo{"VXORPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x57), 1, X86InstInfo{"VDORPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x60), 1, X86InstInfo{"VPUNPCKLBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x61), 1, X86InstInfo{"VPUNPCKLWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x62), 1, X86InstInfo{"VPUNPCKLDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x64), 1, X86InstInfo{"VPCMPGTB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x65), 1, X86InstInfo{"VPVMPGTW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x66), 1, X86InstInfo{"VPVMPGTD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x67), 1, X86InstInfo{"VPACKUSWB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x70), 1, X86InstInfo{"VPSHUFD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x70), 1, X86InstInfo{"VPSHUFHW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x70), 1, X86InstInfo{"VPSHUFLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x71), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 12 + {OPD(1, 0b01, 0x72), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 13 + {OPD(1, 0b01, 0x73), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 14 + + {OPD(1, 0b01, 0x74), 1, X86InstInfo{"VPCMPEQB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x75), 1, X86InstInfo{"VPCMPEQW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x76), 1, X86InstInfo{"VPCMPEQD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x77), 1, X86InstInfo{"VZERO*", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0xC2), 1, X86InstInfo{"VCMPccPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xC2), 1, X86InstInfo{"VCMPccPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0xC2), 1, X86InstInfo{"VCMPccSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0xC2), 1, X86InstInfo{"VCMPccSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xC4), 1, X86InstInfo{"VPINSRW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xC5), 1, X86InstInfo{"VPEXTRW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0xC6), 1, X86InstInfo{"VSHUFPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xC6), 1, X86InstInfo{"VSHUFPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // The above ops are defined from `Table A-17. VEX Opcode Map 1, Low Nibble = [0h:7h]` of AMD Architecture programmer's manual Volume 3 + // This table doesn't state which VEX.pp is for which instruction + // XXX: Confirm all the above encoding opcodes + + {OPD(1, 0b00, 0x28), 1, X86InstInfo{"VMOVAPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x28), 1, X86InstInfo{"VMOVAPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x29), 1, X86InstInfo{"VMOVAPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x29), 1, X86InstInfo{"VMOVAPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b10, 0x2A), 1, X86InstInfo{"VCVTSI2SS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x2A), 1, X86InstInfo{"VCVTSI2SD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x2B), 1, X86InstInfo{"VMOVNTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x2B), 1, X86InstInfo{"VMOVNTPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b10, 0x2C), 1, X86InstInfo{"VCVTTSS2SI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x2C), 1, X86InstInfo{"VCVTTSD2SI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b10, 0x2D), 1, X86InstInfo{"VCVTSS2SI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x2D), 1, X86InstInfo{"VCVTSD2SI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x2E), 1, X86InstInfo{"VUCOMISS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x2E), 1, X86InstInfo{"VUCOMISD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x2F), 1, X86InstInfo{"VUCOMISS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x2F), 1, X86InstInfo{"VUCOMISD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x58), 1, X86InstInfo{"VADDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x58), 1, X86InstInfo{"VADDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x58), 1, X86InstInfo{"VADDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x58), 1, X86InstInfo{"VADDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x59), 1, X86InstInfo{"VMULPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x59), 1, X86InstInfo{"VMULPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x59), 1, X86InstInfo{"VMULSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x59), 1, X86InstInfo{"VMULSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x5B), 1, X86InstInfo{"VCVTDQ2PS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x5B), 1, X86InstInfo{"VCVTPS2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x5B), 1, X86InstInfo{"VCVTPS2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x5C), 1, X86InstInfo{"VSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x5C), 1, X86InstInfo{"VSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x5C), 1, X86InstInfo{"VSUBSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x5C), 1, X86InstInfo{"VSUBSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x5D), 1, X86InstInfo{"VMINPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x5D), 1, X86InstInfo{"VMINPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x5D), 1, X86InstInfo{"VMINSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x5D), 1, X86InstInfo{"VMINSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x5E), 1, X86InstInfo{"VDIVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x5E), 1, X86InstInfo{"VDIVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x5E), 1, X86InstInfo{"VDIVSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x5E), 1, X86InstInfo{"VDIVSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0x5F), 1, X86InstInfo{"VMAXPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x5F), 1, X86InstInfo{"VMAXPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x5F), 1, X86InstInfo{"VMAXSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x5F), 1, X86InstInfo{"VMAXSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + + {OPD(1, 0b01, 0x68), 1, X86InstInfo{"VPUNPCKHBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x69), 1, X86InstInfo{"VPUNPCKHWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x6A), 1, X86InstInfo{"VPUNPCKHDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x6B), 1, X86InstInfo{"VPACKSSDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x6C), 1, X86InstInfo{"VPUNPCKLQDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x6D), 1, X86InstInfo{"VPUNPCKHQDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0x6E), 1, X86InstInfo{"VMOV*", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x6F), 1, X86InstInfo{"VMOVDQA", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x6F), 1, X86InstInfo{"VMOVDQU", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x7C), 1, X86InstInfo{"VHADDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x7C), 1, X86InstInfo{"VHADDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x7D), 1, X86InstInfo{"VHSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0x7D), 1, X86InstInfo{"VHSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x7E), 1, X86InstInfo{"VMOV*", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x7E), 1, X86InstInfo{"VMOVQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0x7F), 1, X86InstInfo{"VMOVDQA", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0x7F), 1, X86InstInfo{"VMOVDQU", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b00, 0xAE), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 15 + {OPD(1, 0b01, 0xAE), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 15 + {OPD(1, 0b10, 0xAE), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 15 + {OPD(1, 0b11, 0xAE), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 15 + + {OPD(1, 0b01, 0xD0), 1, X86InstInfo{"VADDSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0xD0), 1, X86InstInfo{"VADDSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xD1), 1, X86InstInfo{"VPSRLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD2), 1, X86InstInfo{"VPSRLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD3), 1, X86InstInfo{"VPSRLQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD4), 1, X86InstInfo{"VPADDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD5), 1, X86InstInfo{"VPMULLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD6), 1, X86InstInfo{"VMOVQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD7), 1, X86InstInfo{"VPMOVMSKB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xD8), 1, X86InstInfo{"VPSUBUSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xD9), 1, X86InstInfo{"VPSUBUSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDA), 1, X86InstInfo{"VPMINUB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDB), 1, X86InstInfo{"VPAND", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDC), 1, X86InstInfo{"VPADDUSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDD), 1, X86InstInfo{"VPADDUSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDE), 1, X86InstInfo{"VPMAXUB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xDF), 1, X86InstInfo{"VPANDN", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xE0), 1, X86InstInfo{"VPAVGB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE1), 1, X86InstInfo{"VPSRAW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE2), 1, X86InstInfo{"VPSRAD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE3), 1, X86InstInfo{"VPAVGW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE4), 1, X86InstInfo{"VPMULHUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE5), 1, X86InstInfo{"VPMULHW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xE6), 1, X86InstInfo{"VCVTTPD2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b10, 0xE6), 1, X86InstInfo{"VCVTDQ2PD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0xE6), 1, X86InstInfo{"VCVTPD2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xE7), 1, X86InstInfo{"VMOVNTDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xE8), 1, X86InstInfo{"VPSUBSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE9), 1, X86InstInfo{"VPSUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xEA), 1, X86InstInfo{"VPMINSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xEB), 1, X86InstInfo{"VPOR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xEC), 1, X86InstInfo{"VPADDSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xED), 1, X86InstInfo{"VPADDSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xEE), 1, X86InstInfo{"VPMAXSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xEF), 1, X86InstInfo{"VPXOR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b11, 0xF0), 1, X86InstInfo{"VLDDQU", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xF1), 1, X86InstInfo{"VPSLLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF2), 1, X86InstInfo{"VPSLLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF3), 1, X86InstInfo{"VPSLLQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF4), 1, X86InstInfo{"VPMULUDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF5), 1, X86InstInfo{"VPMADDWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF6), 1, X86InstInfo{"VPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF7), 1, X86InstInfo{"VMASKMOVDQU", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(1, 0b01, 0xF8), 1, X86InstInfo{"VPSUBB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xF9), 1, X86InstInfo{"VPSUBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xFA), 1, X86InstInfo{"VPSUBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xFB), 1, X86InstInfo{"VPSUBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xFC), 1, X86InstInfo{"VPADDB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xFD), 1, X86InstInfo{"VPADDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xFE), 1, X86InstInfo{"VPADDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // VEX Map 2 + {OPD(2, 0b01, 0x00), 1, X86InstInfo{"VSHUFB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x01), 1, X86InstInfo{"VPADDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x02), 1, X86InstInfo{"VPHADDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x03), 1, X86InstInfo{"VPHADDSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x04), 1, X86InstInfo{"VPMADDUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x05), 1, X86InstInfo{"VPHSUBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x06), 1, X86InstInfo{"VPHSUBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x07), 1, X86InstInfo{"VPHSUBSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x08), 1, X86InstInfo{"VPSIGNB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x09), 1, X86InstInfo{"VPSIGNW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0A), 1, X86InstInfo{"VPSIGND", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0B), 1, X86InstInfo{"VPMULHRSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0C), 1, X86InstInfo{"VPERMILPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0D), 1, X86InstInfo{"VPERMILPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0E), 1, X86InstInfo{"VTESTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0F), 1, X86InstInfo{"VTESTPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x13), 1, X86InstInfo{"VCVTPH2PS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x16), 1, X86InstInfo{"VPERMPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x17), 1, X86InstInfo{"VPTEST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x18), 1, X86InstInfo{"VBROADCASTSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x19), 1, X86InstInfo{"VBROADCASTSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x1A), 1, X86InstInfo{"VBROADCASTF128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x1C), 1, X86InstInfo{"VPABSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x1D), 1, X86InstInfo{"VPABSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x1E), 1, X86InstInfo{"VPABSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x20), 1, X86InstInfo{"VPMOVSXBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x21), 1, X86InstInfo{"VPMOVSXBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x22), 1, X86InstInfo{"VPMOVSXBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x23), 1, X86InstInfo{"VPMOVSXWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x24), 1, X86InstInfo{"VPMOVSXWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x25), 1, X86InstInfo{"VPMOVSXDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x28), 1, X86InstInfo{"VPMULDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x29), 1, X86InstInfo{"VPCMPEQQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2A), 1, X86InstInfo{"VMOVNTDQA", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2B), 1, X86InstInfo{"VPACKUSDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2C), 1, X86InstInfo{"VMASKMOVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2D), 1, X86InstInfo{"VMASKMOVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2E), 1, X86InstInfo{"VMASKMOVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x2F), 1, X86InstInfo{"VMASKMOVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x30), 1, X86InstInfo{"VPMOVZXBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x31), 1, X86InstInfo{"VPMOVZXBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x32), 1, X86InstInfo{"VPMOVZXBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x33), 1, X86InstInfo{"VPMOVZXWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x34), 1, X86InstInfo{"VPMOVZXWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x35), 1, X86InstInfo{"VPMOVZXDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x36), 1, X86InstInfo{"VPERMD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x37), 1, X86InstInfo{"VPVMPGTQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x38), 1, X86InstInfo{"VPMINSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x39), 1, X86InstInfo{"VPMINSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3A), 1, X86InstInfo{"VPMINUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3B), 1, X86InstInfo{"VPMINUD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3C), 1, X86InstInfo{"VPMAXSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3D), 1, X86InstInfo{"VPMAXSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3E), 1, X86InstInfo{"VPMAXUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x3F), 1, X86InstInfo{"VPMAXUD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x40), 1, X86InstInfo{"VPMULLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x41), 1, X86InstInfo{"VPHMINPOSUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x45), 1, X86InstInfo{"VPSRLV", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x46), 1, X86InstInfo{"VPSRAVD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x47), 1, X86InstInfo{"VPSLLV", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x58), 1, X86InstInfo{"VPBROADCASTD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x59), 1, X86InstInfo{"VPBROADCASTQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x5A), 1, X86InstInfo{"VPBROADCASTI128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x78), 1, X86InstInfo{"VPBROADCASTB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x79), 1, X86InstInfo{"VPBROADCASTW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x8C), 1, X86InstInfo{"VPMASKMOV", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x8E), 1, X86InstInfo{"VPMASKMOV", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x90), 1, X86InstInfo{"VPGATHERD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x91), 1, X86InstInfo{"VPGATHERQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x92), 1, X86InstInfo{"VPGATHERD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x93), 1, X86InstInfo{"VPGATHERQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x96), 1, X86InstInfo{"VFMADDSUB132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x97), 1, X86InstInfo{"VFMSUBADD132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0x98), 1, X86InstInfo{"VFMADD132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x99), 1, X86InstInfo{"VFMADD132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9A), 1, X86InstInfo{"VFMSUB132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9B), 1, X86InstInfo{"VFMSUB132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9C), 1, X86InstInfo{"VFNMADD132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9D), 1, X86InstInfo{"VFNMADD132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9E), 1, X86InstInfo{"VFNMSUB132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x9F), 1, X86InstInfo{"VFNMSUB132", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0xA8), 1, X86InstInfo{"VFMADD213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xA9), 1, X86InstInfo{"VFMADD213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAA), 1, X86InstInfo{"VFMSUB213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAB), 1, X86InstInfo{"VFMSUB213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAC), 1, X86InstInfo{"VFNMADD213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAD), 1, X86InstInfo{"VFNMADD213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAE), 1, X86InstInfo{"VFNMSUB213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xAF), 1, X86InstInfo{"VFNMSUB213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0xB8), 1, X86InstInfo{"VFMADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xB9), 1, X86InstInfo{"VFMADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBA), 1, X86InstInfo{"VFMSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBB), 1, X86InstInfo{"VFMSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBC), 1, X86InstInfo{"VFNMADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBD), 1, X86InstInfo{"VFNMADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBE), 1, X86InstInfo{"VFNMSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xBF), 1, X86InstInfo{"VFNMSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0xA6), 1, X86InstInfo{"VFMADDSUB213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xA7), 1, X86InstInfo{"VFMSUBADD213", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0xB6), 1, X86InstInfo{"VFMADDSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xB7), 1, X86InstInfo{"VFMSUBADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b01, 0xDB), 1, X86InstInfo{"VAESIMC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xDC), 1, X86InstInfo{"VAESENC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xDD), 1, X86InstInfo{"VAESENCLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xDE), 1, X86InstInfo{"VAESDEC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xDF), 1, X86InstInfo{"VAESDECLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b00, 0xF2), 1, X86InstInfo{"ANDN", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b00, 0xF3), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 17 + {OPD(2, 0b01, 0xF3), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 17 + {OPD(2, 0b10, 0xF3), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 17 + {OPD(2, 0b11, 0xF3), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // VEX Group 17 + + {OPD(2, 0b00, 0xF5), 1, X86InstInfo{"BZHI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xF5), 1, X86InstInfo{"PEXT", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b11, 0xF5), 1, X86InstInfo{"PDEP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b11, 0xF6), 1, X86InstInfo{"MULX", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(2, 0b00, 0xF7), 1, X86InstInfo{"BEXTR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0xF7), 1, X86InstInfo{"SHLX", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b10, 0xF7), 1, X86InstInfo{"SARX", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b11, 0xF7), 1, X86InstInfo{"SHRX", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // VEX Map 3 + {OPD(3, 0b01, 0x00), 1, X86InstInfo{"VPERMQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x01), 1, X86InstInfo{"VPERMPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x02), 1, X86InstInfo{"VPBLENDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x04), 1, X86InstInfo{"VPERMILPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x05), 1, X86InstInfo{"VPERMILPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x06), 1, X86InstInfo{"VPERM2F128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x08), 1, X86InstInfo{"VROUNDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x09), 1, X86InstInfo{"VROUNDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0B), 1, X86InstInfo{"VROUNDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0C), 1, X86InstInfo{"VBLENDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0D), 1, X86InstInfo{"VBLENDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0E), 1, X86InstInfo{"VBLENDW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0F), 1, X86InstInfo{"VALIGNR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x14), 1, X86InstInfo{"VPEXTRB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x15), 1, X86InstInfo{"VPEXTRW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x16), 1, X86InstInfo{"VPEXTRD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x17), 1, X86InstInfo{"VEXTRACTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x18), 1, X86InstInfo{"VINSERTF128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x19), 1, X86InstInfo{"VEXTRACTF128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x1D), 1, X86InstInfo{"VCVTPS2PH", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x20), 1, X86InstInfo{"VPINSRB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x21), 1, X86InstInfo{"VINSERTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x22), 1, X86InstInfo{"VPINSRD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x38), 1, X86InstInfo{"VINSERTI128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x39), 1, X86InstInfo{"VEXTRACTI128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x40), 1, X86InstInfo{"VDPPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x41), 1, X86InstInfo{"VDPPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x42), 1, X86InstInfo{"VMPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x44), 1, X86InstInfo{"VPCLMULQDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x46), 1, X86InstInfo{"VPERM2I128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x48), 1, X86InstInfo{"VPERMILzz2PS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x49), 1, X86InstInfo{"VPERMILzz2PD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x4A), 1, X86InstInfo{"VBLENDVPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x4B), 1, X86InstInfo{"VBLENDVPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x4C), 1, X86InstInfo{"VBLENDVB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x5C), 1, X86InstInfo{"VFMADDSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x5D), 1, X86InstInfo{"VFMADDSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x5E), 1, X86InstInfo{"VMFSUBADDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x5F), 1, X86InstInfo{"VFMSUBADDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x60), 1, X86InstInfo{"VPCMPESTRM", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x61), 1, X86InstInfo{"VPCMPESTRI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x62), 1, X86InstInfo{"VPCMPISTRM", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x63), 1, X86InstInfo{"VPCMPISTRI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x68), 1, X86InstInfo{"VFMADDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x69), 1, X86InstInfo{"VFMADDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6A), 1, X86InstInfo{"VFMADDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6B), 1, X86InstInfo{"VFMADDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6C), 1, X86InstInfo{"VFMSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6D), 1, X86InstInfo{"VFMSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6E), 1, X86InstInfo{"VFMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x6F), 1, X86InstInfo{"VFMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0x78), 1, X86InstInfo{"VFNMADDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x79), 1, X86InstInfo{"VFNMADDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7A), 1, X86InstInfo{"VFNMADDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7B), 1, X86InstInfo{"VFNMADDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7C), 1, X86InstInfo{"VFNMSUBPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7D), 1, X86InstInfo{"VFNMSUBPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7E), 1, X86InstInfo{"VFNMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x7F), 1, X86InstInfo{"VFNMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(3, 0b11, 0xF0), 1, X86InstInfo{"RORX", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // VEX Map 4 - 31 (Reserved) + }; +#undef OPD +#define OPD(group, opcode) ((group << 3) | (opcode)) +#define VEX_GROUP_12 0 +#define VEX_GROUP_13 1 +#define VEX_GROUP_14 2 +#define VEX_GROUP_15 3 +#define VEX_GROUP_17 4 + + const std::vector> VEXGroupTable = { + {OPD(VEX_GROUP_12, 0b010), 1, X86InstInfo{"VPSRLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_12, 0b100), 1, X86InstInfo{"VPSRAW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_12, 0b110), 1, X86InstInfo{"VPSLLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(VEX_GROUP_13, 0b010), 1, X86InstInfo{"VPSRLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_13, 0b100), 1, X86InstInfo{"VPSRAD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_13, 0b110), 1, X86InstInfo{"VPSLLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(VEX_GROUP_14, 0b010), 1, X86InstInfo{"VPSRLQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_14, 0b011), 1, X86InstInfo{"VPSRLDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_14, 0b110), 1, X86InstInfo{"VPSLLQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_14, 0b111), 1, X86InstInfo{"VPSLLDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(VEX_GROUP_15, 0b010), 1, X86InstInfo{"VLDMXCSR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_15, 0b011), 1, X86InstInfo{"VSTMXCSR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(VEX_GROUP_17, 0b001), 1, X86InstInfo{"BLSR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_17, 0b010), 1, X86InstInfo{"BLSMSK", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(VEX_GROUP_17, 0b011), 1, X86InstInfo{"BLSI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + }; + +#undef VEX_GROUP_12 +#undef VEX_GROUP_13 +#undef VEX_GROUP_14 +#undef VEX_GROUP_15 +#undef VEX_GROUP_17 + +#undef OPD + +#define OPD(group, pp, opcode) ( (group << 10) | (pp << 8) | (opcode)) +#define XOP_GROUP_8 0 +#define XOP_GROUP_9 1 +#define XOP_GROUP_A 2 + + const std::vector> XOPTable = { + // Group 8 + {OPD(XOP_GROUP_8, 0, 0x85), 1, X86InstInfo{"VPMAXSSWW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x86), 1, X86InstInfo{"VPMACSSWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x87), 1, X86InstInfo{"VPMAXSSDQL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0x8E), 1, X86InstInfo{"VPMACSSDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x8F), 1, X86InstInfo{"VPMACSSDQH", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0x95), 1, X86InstInfo{"VPMAXSWW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x96), 1, X86InstInfo{"VPMAXSWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x97), 1, X86InstInfo{"VPMAXSDQL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0x9E), 1, X86InstInfo{"VPMACSDD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0x9F), 1, X86InstInfo{"VPMACSDQH", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0xA2), 1, X86InstInfo{"VPCMOV", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xA3), 1, X86InstInfo{"VPPERM", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xA6), 1, X86InstInfo{"VPMADCSSWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0xB6), 1, X86InstInfo{"VPMADCSWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0xC0), 1, X86InstInfo{"VPROTB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xC1), 1, X86InstInfo{"VPROTW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xC2), 1, X86InstInfo{"VPROTD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xC3), 1, X86InstInfo{"VPROTQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0xCC), 1, X86InstInfo{"VPCOMccB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xCD), 1, X86InstInfo{"VPCOMccW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xCE), 1, X86InstInfo{"VPCOMccD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xCF), 1, X86InstInfo{"VPCOMccQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_8, 0, 0xEC), 1, X86InstInfo{"VPCOMccUB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xED), 1, X86InstInfo{"VPCOMccUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xEE), 1, X86InstInfo{"VPCOMccUD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_8, 0, 0xEF), 1, X86InstInfo{"VPCOMccUQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // Group 9 + {OPD(XOP_GROUP_9, 0, 0x01), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // Group 1 + {OPD(XOP_GROUP_9, 0, 0x02), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // Group 2 + {OPD(XOP_GROUP_9, 0, 0x12), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // Group 3 + + {OPD(XOP_GROUP_9, 0, 0x80), 1, X86InstInfo{"VFRZPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x81), 1, X86InstInfo{"VFRCZPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x82), 1, X86InstInfo{"VFRCZSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x83), 1, X86InstInfo{"VFRCZSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_9, 0, 0x90), 1, X86InstInfo{"VPROTB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x91), 1, X86InstInfo{"VPROTW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x92), 1, X86InstInfo{"VPROTD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x93), 1, X86InstInfo{"VRPTOQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x94), 1, X86InstInfo{"VPSHLB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x95), 1, X86InstInfo{"VPSHLW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x96), 1, X86InstInfo{"VPSHLD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x97), 1, X86InstInfo{"VPSHLQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_9, 0, 0x98), 1, X86InstInfo{"VPSHAB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x99), 1, X86InstInfo{"VPSHAW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x9A), 1, X86InstInfo{"VPSHAD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0x9B), 1, X86InstInfo{"VPSHAQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_9, 0, 0xC1), 1, X86InstInfo{"VPHADDBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xC2), 1, X86InstInfo{"VPHADDBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xC3), 1, X86InstInfo{"VPHADDBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xC6), 1, X86InstInfo{"VPHADDWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xC7), 1, X86InstInfo{"VPHADDWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xCB), 1, X86InstInfo{"VPHADDDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_9, 0, 0xD1), 1, X86InstInfo{"VPHADDUBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xD2), 1, X86InstInfo{"VPHADDUBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xD3), 1, X86InstInfo{"VPHADDUBQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xD6), 1, X86InstInfo{"VPHADDUWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xD7), 1, X86InstInfo{"VPHADDUWQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xDB), 1, X86InstInfo{"VPHADDUDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + {OPD(XOP_GROUP_9, 0, 0xE1), 1, X86InstInfo{"VPHSUBBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xE2), 1, X86InstInfo{"VPHSUBBD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_9, 0, 0xE3), 1, X86InstInfo{"VPHSUBDQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // Group A + {OPD(XOP_GROUP_A, 0, 0x10), 1, X86InstInfo{"BEXTR", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(XOP_GROUP_A, 0, 0x12), 1, X86InstInfo{"", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, // Group 4 + }; + +#undef XOP_GROUP_8 +#undef XOP_GROUP_9 +#undef XOP_GROUP_A + +#undef OPD +#define OPD(subgroup, opcode) (((subgroup - 1) << 3) | (opcode)) + const std::vector> XOPGroupTable = { + // Group 1 + {OPD(1, 1), 1, X86InstInfo{"BLCFILL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 2), 1, X86InstInfo{"BLSFILL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 3), 1, X86InstInfo{"BLCS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 4), 1, X86InstInfo{"TZMSK", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 5), 1, X86InstInfo{"BLCIC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 6), 1, X86InstInfo{"BLSIC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 7), 1, X86InstInfo{"T1MSKC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // Group 2 + {OPD(2, 1), 1, X86InstInfo{"BLCMSK", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 6), 1, X86InstInfo{"BLCI", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // Group 3 + {OPD(3, 0), 1, X86InstInfo{"LLWPCB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 1), 1, X86InstInfo{"SLWPCB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + + // Group 4 + {OPD(4, 0), 1, X86InstInfo{"LWPINS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(4, 1), 1, X86InstInfo{"LWPVAL", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + }; +#undef OPD + + uint64_t Total{}; + uint64_t NumInsts{}; + auto GenerateTable = [&Total, &NumInsts](auto& FinalTable, auto& LocalTable) { + for (auto Op : LocalTable) { + auto OpNum = std::get<0>(Op); + auto Info = std::get<2>(Op); + for (uint8_t i = 0; i < std::get<1>(Op); ++i) { + LogMan::Throw::A(FinalTable.at(OpNum + i).Type == TYPE_UNKNOWN, "Duplicate Entry %s->%s", FinalTable.at(OpNum + i).Name, Info.Name); + FinalTable.at(OpNum + i) = Info; + ++Total; + if (Info.Type == TYPE_INST) + NumInsts++; + } + } + }; + + auto GenerateX87Table = [&Total, &NumInsts](auto& FinalTable, auto& LocalTable) { + for (auto Op : LocalTable) { + auto OpNum = std::get<0>(Op); + auto Info = std::get<2>(Op); + for (uint8_t i = 0; i < std::get<1>(Op); ++i) { + LogMan::Throw::A(FinalTable.at(OpNum + i).Type == TYPE_UNKNOWN, "Duplicate Entry %s->%s", FinalTable.at(OpNum + i).Name, Info.Name); + if ((OpNum & 0b11'000'000) == 0b11'000'000) { + // If the mod field is 0b11 then it is a regular op + FinalTable.at(OpNum + i) = Info; + } + else { + // If the mod field is !0b11 then this instruction is duplicated through the whole mod [0b00, 0b10] range + // and the modrm.rm space because that is used part of the instruction encoding + LogMan::Throw::A((OpNum & 0b11'000'000) == 0, "Only support mod field of zero in this path"); + for (uint16_t mod = 0b00'000'000; mod < 0b11'000'000; mod += 0b01'000'000) { + for (uint16_t rm = 0b000; rm < 0b1'000; ++rm) { + FinalTable.at((OpNum | mod | rm) + i) = Info; + } + } + } + Total++; + if (Info.Type == TYPE_INST) { + NumInsts++; + } + } + } + }; + + auto GenerateTableWithCopy = [&Total, &NumInsts](auto& FinalTable, auto& LocalTable, auto& OtherLocal) { + for (auto Op : LocalTable) { + auto OpNum = std::get<0>(Op); + auto Info = std::get<2>(Op); + for (uint8_t i = 0; i < std::get<1>(Op); ++i) { + LogMan::Throw::A(FinalTable.at(OpNum + i).Type == TYPE_UNKNOWN, "Duplicate Entry %s->%s", FinalTable.at(OpNum + i).Name, Info.Name); + if (Info.Type == TYPE_COPY_OTHER) { + FinalTable.at(OpNum + i) = OtherLocal.at(OpNum + i); + } + else { + FinalTable.at(OpNum + i) = Info; + Total++; + if (Info.Type == TYPE_INST) { + NumInsts++; + } + } + } + } + }; + + auto CheckTable = [&UnknownOp](auto& FinalTable) { + for (size_t i = 0; i < FinalTable.size(); ++i) { + auto const &Op = FinalTable.at(i); + + if (Op == UnknownOp) { + LogMan::Msg::A("Unknown Op: 0x%lx", i); + } + } + }; + + GenerateTable(BaseOps, BaseOpTable); + GenerateTable(SecondBaseOps, TwoByteOpTable); + GenerateTable(PrimaryInstGroupOps, PrimaryGroupOpTable); + + GenerateTableWithCopy(RepModOps, RepModOpTable, SecondBaseOps); + GenerateTableWithCopy(RepNEModOps, RepNEModOpTable, SecondBaseOps); + GenerateTableWithCopy(OpSizeModOps, OpSizeModOpTable, SecondBaseOps); + GenerateTable(SecondInstGroupOps, SecondaryExtensionOpTable); + GenerateTable(SecondModRMTableOps, SecondaryModRMExtensionOpTable); + + GenerateX87Table(X87Ops, X87OpTable); + GenerateTable(DDDNowOps, DDDNowOpTable); + GenerateTable(H0F38TableOps, H0F38Table); + GenerateTable(H0F3ATableOps, H0F3ATable); + + GenerateTable(VEXTableOps, VEXTable); + GenerateTable(VEXTableGroupOps, VEXGroupTable); + + GenerateTable(XOPTableOps, XOPTable); + GenerateTable(XOPTableGroupOps, XOPGroupTable); + + CheckTable(BaseOps); + CheckTable(SecondBaseOps); + + CheckTable(RepModOps); + CheckTable(RepNEModOps); + CheckTable(OpSizeModOps); + CheckTable(X87Ops); + +#ifndef NDEBUG + X86InstDebugInfo::InstallDebugInfo(); +#endif + + printf("X86Tables had %ld total insts, and %ld labeled as understood\n", Total, NumInsts); +} + +} diff --git a/Source/Interface/HLE/FileManagement.cpp b/Source/Interface/HLE/FileManagement.cpp new file mode 100644 index 000000000..8092d7236 --- /dev/null +++ b/Source/Interface/HLE/FileManagement.cpp @@ -0,0 +1,243 @@ +#include "LogManager.h" + +#include "Interface/Context/Context.h" +#include "Interface/HLE/FileManagement.h" +#include +#include +#include + +namespace FEXCore { + +class STDFD final : public FD { +public: + STDFD(FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) + : FD (ctx, fd, pathname, flags, mode) { + } + + ssize_t writev(int fd, void *iov, int iovcnt) override { + ssize_t FinalSize {}; + std::string OutputString; + + struct iovStruct { + uint64_t base; + size_t len; + }; + iovStruct *iovObject = reinterpret_cast(iov); + + for (int i = 0; i < iovcnt; ++i) { + const char *String = CTX->MemoryMapper.GetPointer(iovObject[i].base); + for (size_t j = 0; j < iovObject[i].len; ++j) { + OutputString += String[j]; + } + FinalSize += iovObject[i].len; + } + + OutputString += '\0'; + + if (FDOffset == STDOUT_FILENO) + LogMan::Msg::OUT("[%ld] %s", FinalSize, OutputString.c_str()); + else if (FDOffset == STDERR_FILENO) + LogMan::Msg::ERR("[%ld] %s", FinalSize, OutputString.c_str()); + + return FinalSize; + } + + uint64_t write(int fd, void *buf, size_t count) override { + if (FDOffset == STDOUT_FILENO) + LogMan::Msg::OUT("%s", reinterpret_cast(buf)); + else if (FDOffset == STDERR_FILENO) + LogMan::Msg::ERR("%s", reinterpret_cast(buf)); + return count; + } +}; + +uint64_t FD::read(int fd, void *buf, size_t count) { + return ::read(HostFD, buf, count); +} + +ssize_t FD::writev(int fd, void *iov, int iovcnt) { + ssize_t FinalSize {}; + LogMan::Msg::I(">>> writev: %d %p %d", fd, iov, iovcnt); + for (int i = 0; i < iovcnt; ++i) { + struct iovStruct { + uint64_t base; + size_t len; + }; + iovStruct *iovObject = reinterpret_cast(iov); + const char *String = CTX->MemoryMapper.GetPointer(iovObject->base); + LogMan::Msg::I("\t0x%lx Size: 0x%zx %p", iovObject->base, iovObject->len, String); + for (size_t j = 0; j < iovObject->len; ++j) { + LogMan::Msg::I("%c", String[j]); + } + FinalSize += iovObject->len; + } + + return FinalSize; +} + +uint64_t FD::write(int fd, void *buf, size_t count) { + return ::write(fd, buf, count); +} + +int FD::openat(int dirfd, const char *pathname, int flags, mode_t mode) { + HostFD = ::openat(dirfd, pathname, flags, mode); + return HostFD; +} + +int FD::fstat(int fd, struct stat *buf) { + return ::fstat(HostFD, buf); +} + +int FD::close(int fd) { + LogMan::Msg::D("Closing: %s", PathName.c_str()); + return ::close(HostFD); +} + +FileManager::FileManager(FEXCore::Context::Context *ctx) + : CTX {ctx} { + + FDMap[CurrentFDOffset++] = new STDFD{CTX, STDIN_FILENO, "stdin", 0, 0}; + FDMap[CurrentFDOffset++] = new STDFD{CTX, STDOUT_FILENO, "stdout", 0, 0}; + FDMap[CurrentFDOffset++] = new STDFD{CTX, STDERR_FILENO, "stderr", 0, 0}; +} + +FileManager::~FileManager() { + for (auto &FD : FDMap) { + delete FD.second; + } +} + +uint64_t FileManager::Read(int fd, [[maybe_unused]] void *buf, [[maybe_unused]] size_t count) { + auto FD = FDMap.find(fd); + if (FD == FDMap.end()) { + LogMan::Msg::I("XXX: Implement Read: %d", fd); + return -1; + } + + return FD->second->read(fd, buf, count); +} + +uint64_t FileManager::Write(int fd, void *buf, size_t count) { + auto FD = FDMap.find(fd); + if (FD == FDMap.end()) { + LogMan::Msg::I("XXX: Implement write: %d", fd); + return -1; + } + + return FD->second->write(fd, buf, count); +} + +uint64_t FileManager::Open(const char *pathname, [[maybe_unused]] int flags, [[maybe_unused]] uint32_t mode) { + LogMan::Msg::I("XXX: Trying to open: '%s'", pathname); + return 0; +} + +uint64_t FileManager::Close(int fd) { + auto FD = FDMap.find(fd); + if (FD == FDMap.end()) { + LogMan::Msg::I("XXX: Trying to close: '%d'", fd); + return 0; + } + + int Result = FD->second->close(fd); + delete FD->second; + FDMap.erase(FD); + return Result; +} + +uint64_t FileManager::Stat(const char *pathname, void *buf) { + return ::stat(pathname, reinterpret_cast(buf)); +} + +uint64_t FileManager::Fstat(int fd, void *buf) { + if (fd == STDOUT_FILENO || fd == STDERR_FILENO) { + struct stat TmpBuf; + int Result = fstat(fd, &TmpBuf); + + // Blow away access times + // Causes issues with lockstep runner and file acesses + memset(&TmpBuf.st_atime, 0, sizeof(time_t)); + memset(&TmpBuf.st_mtime, 0, sizeof(time_t)); + memset(&TmpBuf.st_ctime, 0, sizeof(time_t)); + TmpBuf.st_rdev = 0x8800 + fd; + + memcpy(buf, &TmpBuf, sizeof(struct stat)); + return Result; + } + else { + auto FD = FDMap.find(fd); + if (FD != FDMap.end()) { + return FD->second->fstat(fd, reinterpret_cast(buf)); + } + } + + LogMan::Msg::D("Attempting to stat: %d", fd); + return -1LL; +} + +uint64_t FileManager::Lseek(int fd, uint64_t offset, int whence) { + LogMan::Msg::E("XXX: Attempting to lseek %d 0x%lx 0x%x", fd, offset, whence); + return -1LL; +} + +uint64_t FileManager::Writev(int fd, void *iov, int iovcnt) { + auto fdPtr = FDMap.find(fd); + if (fdPtr == FDMap.end()) { + LogMan::Msg::E("XXX: Trying to writev unknown fd: %d", fd); + return FDMap.find(0)->second->writev(0, iov, iovcnt); + return -1LL; + } + return fdPtr->second->writev(fd, iov, iovcnt); +} + +uint64_t FileManager::Access(const char *pathname, [[maybe_unused]] int mode) { + LogMan::Msg::D("Trying to read access of: %s", pathname); + return access(pathname, mode); +} + +uint64_t FileManager::Readlink(const char *pathname, char *buf, size_t bufsiz) { + LogMan::Msg::D("Attemptign to readlink: '%s'", pathname); + if (strcmp(pathname, "/proc/self/exe") == 0) { + strncpy(buf, Filename.c_str(), bufsiz); + return std::min(bufsiz, Filename.size()); + } + + return readlink(pathname, buf, bufsiz); +} + +uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char *pathname, int flags, uint32_t mode) { + int32_t fd = CurrentFDOffset; + LogMan::Msg::D("Attempting to open '%s'", pathname); + if (!strcmp(pathname, "/dev/tty")) { + FDMap[CurrentFDOffset++] = new STDFD{CTX, STDOUT_FILENO, "/dev/tty", 0, 0}; + return fd; + } + + if (!strcmp(pathname, "/etc/ld.so.cache")) { + return -1; + } + + auto fdPtr = new FD{CTX, fd, pathname, flags, mode}; + + auto Result = fdPtr->openat(dirfs, pathname, flags, mode); + if (Result == -1) { + delete fdPtr; + return -1; + } + + FDMap[CurrentFDOffset++] = fdPtr; + + LogMan::Msg::D("Opening: %d(%d) %s\n", fd, Result, pathname); + return fd; +} + +int32_t FileManager::FindHostFD(int fd) { + auto FD = FDMap.find(fd); + if (FD == FDMap.end()) { + return -1; + } + + return FD->second->GetHostFD(); +} + +} diff --git a/Source/Interface/HLE/FileManagement.h b/Source/Interface/HLE/FileManagement.h new file mode 100644 index 000000000..e7c5b09b8 --- /dev/null +++ b/Source/Interface/HLE/FileManagement.h @@ -0,0 +1,77 @@ +#pragma once +#include +#include +#include +#include + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore { + +class FD { +public: + FD() = delete; + FD(FD &&) = delete; + + FD(FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) + : CTX {ctx} + , FDOffset {fd} + , PathName {pathname} + , Flags {flags} + , Mode {mode} { + } + virtual ~FD() {} + + uint64_t read(int fd, void *buf, size_t count); + virtual ssize_t writev(int fd, void *iov, int iovcnt); + virtual uint64_t write(int fd, void *buf, size_t count); + int openat(int dirfd, const char *pathname, int flags, mode_t mode); + int fstat(int fd, struct stat *buf); + int close(int fd); + + int GetHostFD() const { return HostFD; } + +protected: + FEXCore::Context::Context *CTX; + [[maybe_unused]] int32_t FDOffset{}; + std::string PathName; + [[maybe_unused]] int32_t Flags; + [[maybe_unused]] mode_t Mode; + + int32_t HostFD; +}; + +class FileManager final { +public: + FileManager() = delete; + FileManager(FileManager &&) = delete; + + FileManager(FEXCore::Context::Context *ctx); + ~FileManager(); + uint64_t Read(int fd, void *buf, size_t count); + uint64_t Write(int fd, void *buf, size_t count); + uint64_t Open(const char *pathname, int flags, uint32_t mode); + uint64_t Close(int fd); + uint64_t Stat(const char *pathname, void *buf); + uint64_t Fstat(int fd, void *buf); + uint64_t Lseek(int fd, uint64_t offset, int whence); + uint64_t Writev(int fd, void *iov, int iovcnt); + uint64_t Access(const char *pathname, int mode); + uint64_t Readlink(const char *pathname, char *buf, size_t bufsiz); + uint64_t Openat(int dirfs, const char *pathname, int flags, uint32_t mode); + + int32_t FindHostFD(int fd); + + void SetFilename(std::string const &File) { Filename = File; } + std::string const & GetFilename() const { return Filename; } + +private: + FEXCore::Context::Context *CTX; + int32_t CurrentFDOffset{0}; + std::unordered_map FDMap; + + std::string Filename; +}; +} diff --git a/Source/Interface/HLE/Syscalls.cpp b/Source/Interface/HLE/Syscalls.cpp new file mode 100644 index 000000000..bc899ba5e --- /dev/null +++ b/Source/Interface/HLE/Syscalls.cpp @@ -0,0 +1,464 @@ +#include "Common/MathUtils.h" + +#include "Interface/Context/Context.h" +#include "Interface/Core/InternalThreadState.h" +#include "Interface/HLE/Syscalls.h" + +#include "LogManager.h" + +#include +#include + +constexpr uint64_t PAGE_SIZE = 4096; + +namespace FEXCore { +void SyscallHandler::DefaultProgramBreak(FEXCore::Core::InternalThreadState *Thread, uint64_t Addr) { + DataSpaceSize = 0; + DataSpace = Addr; + DefaultProgramBreakAddress = Addr; + + // Just allocate 1GB of data memory past the default program break location at this point + CTX->MapRegion(Thread, Addr, 0x1000'0000); +} + +uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args) { + uint64_t Result = 0; + + LogMan::Msg::D("Syscall: %d", Args->Argument[0]); + + switch (Args->Argument[0]) { + case SYSCALL_UNAME: { + struct _utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + }; + _utsname *Local = CTX->MemoryMapper.GetPointer<_utsname*>(Args->Argument[1]); + strcpy(Local->sysname, "Linux"); + strcpy(Local->nodename, "FEXCore"); + strcpy(Local->release, "5.0.0"); + strcpy(Local->version, "#1"); + strcpy(Local->machine, "x86_64"); + Result = 0; + break; + } + // Memory management + case SYSCALL_BRK: { + LogMan::Msg::D("\tBRK: 0x%lx - 0x%lx", Args->Argument[1], DataSpace); + if (Args->Argument[1] == 0) { // Just wants to get the location of the program break atm + Result = DataSpace + DataSpaceSize; + } + else { + // Allocating out data space + uint64_t NewEnd = Args->Argument[1]; + if (NewEnd < DataSpace) { + // Not allowed to move brk end below original start + // Set the size to zero + DataSpaceSize = 0; + } + else { + uint64_t NewSize = NewEnd - DataSpace; + DataSpaceSize = NewSize; + } + Result = DataSpace + DataSpaceSize; + } + break; + } + case SYSCALL_MMAP: { + LogMan::Msg::D("\tMMAP( %p, 0x%lx, %d, 0x%x, %d, 0x%lx)", + Args->Argument[1], Args->Argument[2], + Args->Argument[3], Args->Argument[4], + Args->Argument[5], Args->Argument[6]); + int Flags = Args->Argument[4]; + int GuestFD = Args->Argument[5]; + int HostFD = -1; + + if (GuestFD != -1) { + HostFD = FM.FindHostFD(GuestFD); + } + + uint64_t Base = AlignDown(LastMMAP, PAGE_SIZE); + uint64_t Size = AlignUp(Args->Argument[2], PAGE_SIZE); + uint64_t FileSizeToUse = Args->Argument[2]; + uint64_t Prot = Args->Argument[3]; + +#ifdef DEBUG_MMAP + FileSizeToUse = Size; + Prot = PROT_READ | PROT_WRITE | PROT_EXEC; +#endif + + if (Flags & MAP_FIXED) { + Base = Args->Argument[1]; + void *HostPtr = CTX->MemoryMapper.GetPointer(Base); + if (!HostPtr) { + HostPtr = CTX->MapRegion(Thread, Base, Size, true); + } + else { + LogMan::Msg::D("\tMapping Fixed pointer in already mapped space: 0x%lx -> %p", Base, HostPtr); + } + + if (HostFD != -1) { +#ifdef DEBUG_MMAP + // We are a file. Screw you I'm going to just memcpy you in to place + void *FileMem = mmap(nullptr, FileSizeToUse, Prot, MAP_PRIVATE, HostFD, Args->Argument[6]); + if (FileMem == MAP_FAILED) { + LogMan::Msg::A("Couldn't map file to %p\n", HostPtr); + } + + memcpy(HostPtr, FileMem, FileSizeToUse); + munmap(FileMem, Size); +#else + void *FileMem = mmap(HostPtr, FileSizeToUse, Prot, MAP_PRIVATE | MAP_FIXED, HostFD, Args->Argument[6]); + if (FileMem == MAP_FAILED) { + LogMan::Msg::A("Couldn't map file to %p\n", HostPtr); + } +#endif + } + else { + LogMan::Throw::A(Args->Argument[6] == 0, "Don't understand a fixed offset mmap"); + } + + Result = Base; + } + else { + // XXX: MMAP should map memory regions for all threads + void *HostPtr = CTX->MapRegion(Thread, Base, Size, true); + + if (HostFD != -1) { +#ifdef DEBUG_MMAP + // We are a file. Screw you I'm going to just memcpy you in to place + void *FileMem = mmap(nullptr, FileSizeToUse, Prot, MAP_PRIVATE, HostFD, Args->Argument[6]); + if (FileMem == MAP_FAILED) { + LogMan::Msg::A("Couldn't map file to %p\n", HostPtr); + } + + memcpy(HostPtr, FileMem, FileSizeToUse); + munmap(FileMem, Size); +#else + void *FileMem = mmap(HostPtr, FileSizeToUse, Prot, MAP_PRIVATE | MAP_FIXED, HostFD, Args->Argument[6]); + if (FileMem == MAP_FAILED) { + LogMan::Msg::A("Couldn't map file to %p\n", HostPtr); + } +#endif + } + + LastMMAP += Size; + Result = Base; + } + break; + } + case SYSCALL_MPROTECT: { + LogMan::Msg::D("\tMPROTECT: 0x%x, 0x%lx, 0x%lx", Args->Argument[1], Args->Argument[2], Args->Argument[3]); + void *HostPtr = CTX->MemoryMapper.GetPointer(Args->Argument[1]); + + Result = mprotect(HostPtr, Args->Argument[2], Args->Argument[3]); + break; + } + case SYSCALL_ARCH_PRCTL: { + LogMan::Msg::D("\tPRTCL: 0x%x: 0x%lx", Args->Argument[1], Args->Argument[2]); + switch (Args->Argument[1]) { + case 0x1001: // ARCH_SET_GS + Thread->State.State.gs = Args->Argument[2]; + break; + case 0x1002: // ARCH_SET_FS + Thread->State.State.fs = Args->Argument[2]; + break; + case 0x1003: // ARCH_GET_FS + *CTX->MemoryMapper.GetPointer(Args->Argument[2]) = Thread->State.State.fs; + break; + case 0x1004: // ARCH_GET_GS + *CTX->MemoryMapper.GetPointer(Args->Argument[2]) = Thread->State.State.gs; + break; + default: + LogMan::Msg::E("Unknown prctl: 0x%x", Args->Argument[1]); + CTX->ShouldStop = true; + break; + } + Result = 0; + break; + } + // Thread management + case SYSCALL_GETUID: + Result = Thread->State.ThreadManager.GetUID(); + break; + case SYSCALL_GETGID: + Result = Thread->State.ThreadManager.GetGID(); + break; + case SYSCALL_GETEUID: + Result = Thread->State.ThreadManager.GetEUID(); + break; + case SYSCALL_GETEGID: + Result = Thread->State.ThreadManager.GetEGID(); + break; + case SYSCALL_GETTID: + Result = Thread->State.ThreadManager.GetTID(); + break; + case SYSCALL_GETPID: + Result = Thread->State.ThreadManager.GetPID(); + break; + case SYSCALL_EXIT: + LogMan::Msg::D("Thread exit with: %zd\n", Args->Argument[1]); + Thread->State.RunningEvents.ShouldStop = true; + break; + case SYSCALL_WAIT4: + LogMan::Msg::I("wait4(%lx,\n\t%lx,\n\t%lx,\n\t%lx)", + Args->Argument[1], + Args->Argument[2], + Args->Argument[3], + Args->Argument[4]); + break; + + // Futexes + case SYSCALL_FUTEX: { + + // 0 : uaddr + // 1 : op + // 2: val + // 3: utime + // 4: uaddr2 + // 5: val3 + LogMan::Msg::I("futex(%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx)", + Args->Argument[1], + Args->Argument[2], + Args->Argument[3], + Args->Argument[4], + Args->Argument[5], + Args->Argument[6]); + + uint8_t Command = Args->Argument[2] & 0xF; + Result = 0; + switch (Command) { + case 0: { // WAIT + LogMan::Throw::A(!Args->Argument[4], "Can't handle timed futexes"); + Futex *futex = new Futex{}; // XXX: Definitely a memory leak. When should we free this? + futex->Addr = CTX->MemoryMapper.GetPointer*>(Args->Argument[1]); + futex->Val = Args->Argument[3]; + EmplaceFutex(Args->Argument[1], futex); + { + std::unique_lock lk(futex->Mutex); + futex->cv.wait(lk, [futex] { return futex->Addr->load() != futex->Val; }); + } + break; + } + case 1: { // WAKE + Futex *futex = GetFutex(Args->Argument[1]); + if (!futex) { + Result = 0; + break; + } + for (uint64_t i = 0; i < Args->Argument[3]; ++i) + futex->cv.notify_one(); + break; + } + default: + LogMan::Msg::A("Unknown futex command"); + break; + } + break; + } + case SYSCALL_CLONE: { + // 0: clone_flags + // 1: New SP + // 2: parent tidptr + // 3: child tidptr + // 4: TLS + LogMan::Msg::I("clone(%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx)", + Args->Argument[0], + Args->Argument[1], + Args->Argument[2], + Args->Argument[3], + Args->Argument[4], + Args->Argument[5]); + uint32_t Flags = Args->Argument[1]; + uint64_t NewSP = Args->Argument[2]; + uint64_t ParentTID = Args->Argument[3]; + uint64_t ChildTID = Args->Argument[4]; + uint64_t NewTLS = Args->Argument[5]; +#define FLAGPRINT(x, y) if (Flags & (y)) LogMan::Msg::I("\tFlag: " #x) + FLAGPRINT(CSIGNAL, 0x000000FF); + FLAGPRINT(CLONE_VM, 0x00000100); + FLAGPRINT(CLONE_FS, 0x00000200); + FLAGPRINT(CLONE_FILES, 0x00000400); + FLAGPRINT(CLONE_SIGHAND, 0x00000800); + FLAGPRINT(CLONE_PTRACE, 0x00002000); + FLAGPRINT(CLONE_VFORK, 0x00004000); + FLAGPRINT(CLONE_PARENT, 0x00008000); + FLAGPRINT(CLONE_THREAD, 0x00010000); + FLAGPRINT(CLONE_NEWNS, 0x00020000); + FLAGPRINT(CLONE_SYSVSEM, 0x00040000); + FLAGPRINT(CLONE_SETTLS, 0x00080000); + FLAGPRINT(CLONE_PARENT_SETTID, 0x00100000); + FLAGPRINT(CLONE_CHILD_CLEARTID, 0x00200000); + FLAGPRINT(CLONE_DETACHED, 0x00400000); + FLAGPRINT(CLONE_UNTRACED, 0x00800000); + FLAGPRINT(CLONE_CHILD_SETTID, 0x01000000); + FLAGPRINT(CLONE_NEWCGROUP, 0x02000000); + FLAGPRINT(CLONE_NEWUTS, 0x04000000); + FLAGPRINT(CLONE_NEWIPC, 0x08000000); + FLAGPRINT(CLONE_NEWUSER, 0x10000000); + FLAGPRINT(CLONE_NEWPID, 0x20000000); + FLAGPRINT(CLONE_NEWNET, 0x40000000); + FLAGPRINT(CLONE_IO, 0x80000000); + + FEXCore::Core::CPUState NewThreadState{}; + // Clone copies the parent thread's state + memcpy(&NewThreadState, &Thread->State.State, sizeof(FEXCore::Core::CPUState)); + + NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0; + NewThreadState.gregs[FEXCore::X86State::REG_RBX] = 0; + NewThreadState.gregs[FEXCore::X86State::REG_RBP] = 0; + NewThreadState.gregs[FEXCore::X86State::REG_RSP] = NewSP; + NewThreadState.fs = NewTLS; + + // Set us to start just after the syscall instruction + NewThreadState.rip += 2; + + auto NewThread = CTX->CreateThread(&NewThreadState, ParentTID, ChildTID); + CTX->CopyMemoryMapping(Thread, NewThread); + + // Sets the child TID to pointer in ParentTID + if (Flags & CLONE_PARENT_SETTID) { + uint64_t *TIDPtr = CTX->MemoryMapper.GetPointer(ParentTID); + TIDPtr[0] = NewThread->State.ThreadManager.GetTID(); + } + + // Sets the child TID to the pointer in ChildTID + if (Flags & CLONE_CHILD_SETTID) { + uint64_t *TIDPtr = CTX->MemoryMapper.GetPointer(ChildTID); + TIDPtr[0] = NewThread->State.ThreadManager.GetTID(); + } + + // When the thread exits, clear the child thread ID at ChildTID + // Additionally wakeup a futex at that address + // Address /may/ be changed with SET_TID_ADDRESS syscall + if (Flags & CLONE_CHILD_CLEARTID) { + } + + CTX->InitializeThread(NewThread); + + // Actually start the thread + CTX->RunThread(NewThread); + + // Return the new threads TID + Result = NewThread->State.ThreadManager.GetTID(); + break; + } + // File management + case SYSCALL_READ: + Result = FM.Read(Args->Argument[1], + CTX->MemoryMapper.GetPointer(Args->Argument[2]), + Args->Argument[3]); + break; + case SYSCALL_WRITE: + Result = FM.Write(Args->Argument[1], + CTX->MemoryMapper.GetPointer(Args->Argument[2]), + Args->Argument[3]); + break; + case SYSCALL_OPEN: + Result = FM.Open(CTX->MemoryMapper.GetPointer(Args->Argument[1]), + Args->Argument[2], + Args->Argument[3]); + break; + case SYSCALL_CLOSE: + Result = FM.Close(Args->Argument[1]); + break; + case SYSCALL_STAT: + Result = FM.Stat(CTX->MemoryMapper.GetPointer(Args->Argument[1]), + CTX->MemoryMapper.GetPointer(Args->Argument[2])); + break; + case SYSCALL_FSTAT: + Result = FM.Fstat(Args->Argument[1], + CTX->MemoryMapper.GetPointer(Args->Argument[2])); + break; + case SYSCALL_LSEEK: + Result = FM.Lseek(Args->Argument[1], + Args->Argument[2], + Args->Argument[3]); + break; + case SYSCALL_WRITEV: + Result = FM.Writev(Args->Argument[1], + CTX->MemoryMapper.GetPointer(Args->Argument[2]), + Args->Argument[3]); + break; + case SYSCALL_ACCESS: + Result = FM.Access( + CTX->MemoryMapper.GetPointer(Args->Argument[1]), + Args->Argument[2]); + break; + case SYSCALL_READLINK: + Result = FM.Readlink( + CTX->MemoryMapper.GetPointer(Args->Argument[1]), + CTX->MemoryMapper.GetPointer(Args->Argument[2]), + Args->Argument[3]); + break; + case SYSCALL_OPENAT: + Result = FM.Openat( + Args->Argument[1], + CTX->MemoryMapper.GetPointer(Args->Argument[2]), + Args->Argument[3], + Args->Argument[4]); + break; + + case SYSCALL_CLOCK_GETTIME: { + timespec *ClockResult = CTX->MemoryMapper.GetPointer(Args->Argument[2]); + Result = clock_gettime(Args->Argument[1], ClockResult); + // XXX: Debug + // memset(ClockResult, 0, sizeof(timespec)); + } + break; + case SYSCALL_NANOSLEEP: { + timespec const* req = CTX->MemoryMapper.GetPointer(Args->Argument[1]); + timespec *rem = CTX->MemoryMapper.GetPointer(Args->Argument[2]); + Result = nanosleep(req, rem); + break; + } + case SYSCALL_SET_TID_ADDRESS: { + Thread->State.ThreadManager.child_tid = Args->Argument[1]; + Result = Thread->State.ThreadManager.GetTID(); + break; + } + case SYSCALL_SET_ROBUST_LIST: { + Thread->State.ThreadManager.robust_list_head = Args->Argument[1]; + Result = 0; + break; + } + case SYSCALL_PRLIMIT64: { + LogMan::Throw::A(Args->Argument[3] == 0, "Guest trying to set limit for %d", Args->Argument[2]); + struct rlimit { + uint64_t rlim_cur; + uint64_t rlim_max; + }; + switch (Args->Argument[2]) { + case 3: { // Stack limits + rlimit *old_limit = CTX->MemoryMapper.GetPointer(Args->Argument[3]); + // Default size + old_limit->rlim_cur = 8 * 1024; + old_limit->rlim_max = ~0ULL; + break; + } + default: LogMan::Msg::A("Unknown PRLimit: %d", Args->Argument[2]); + } + Result = 0; + + break; + } + // Currently unhandled + // Return fake result + case SYSCALL_RT_SIGACTION: + case SYSCALL_RT_SIGPROCMASK: + case SYSCALL_EXIT_GROUP: + case SYSCALL_TGKILL: + case SYSCALL_MUNMAP: + Result = 0; + break; + default: + Result = -1; + LogMan::Msg::A("Unknown syscall: %d", Args->Argument[0]); + break; + } + + return Result; +} +} diff --git a/Source/Interface/HLE/Syscalls.h b/Source/Interface/HLE/Syscalls.h new file mode 100644 index 000000000..96500767e --- /dev/null +++ b/Source/Interface/HLE/Syscalls.h @@ -0,0 +1,106 @@ +#pragma once + +#include "Interface/HLE/FileManagement.h" +#include + +#include +#include +#include +#include + + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore::Core { +struct InternalThreadState; +} + +namespace FEXCore { + +///< Enum containing all support x86-64 linux syscalls that we support +enum Syscalls { + SYSCALL_READ = 0, ///< __NR_read + SYSCALL_WRITE = 1, ///< __NR_write + SYSCALL_OPEN = 2, ///< __NR_open + SYSCALL_CLOSE = 3, ///< __NR_close + SYSCALL_STAT = 4, ///< __NR_stat + SYSCALL_FSTAT = 5, ///< __NR_fstat + SYSCALL_LSEEK = 8, ///< __NR_lseek + SYSCALL_MMAP = 9, ///< __NR_mmap + SYSCALL_MPROTECT = 10, ///< __NR_mprotect + SYSCALL_MUNMAP = 11, ///< __NR_munmap + SYSCALL_BRK = 12, ///< __NR_brk + SYSCALL_RT_SIGACTION = 13, ///< __NR_rt_sigaction + SYSCALL_RT_SIGPROCMASK = 14, ///< __NR_rt_sigprocmask + SYSCALL_WRITEV = 20, ///< __NR_writev + SYSCALL_ACCESS = 21, ///< __NR_access + SYSCALL_NANOSLEEP = 35, ///< __NR_nanosleep + SYSCALL_GETPID = 39, ///< __NR_getpid + SYSCALL_CLONE = 56, ///< __NR_clone + SYSCALL_EXIT = 60, ///< __NR_exit + SYSCALL_WAIT4 = 61, ///< __NR_wait4 + SYSCALL_UNAME = 63, ///< __NR_uname + SYSCALL_READLINK = 89, ///< __NR_readlink + SYSCALL_GETUID = 102, ///< __NR_getuid + SYSCALL_GETGID = 104, ///< __NR_getgid + SYSCALL_GETEUID = 107, ///< __NR_geteuid + SYSCALL_GETEGID = 108, ///< __NR_getegid + SYSCALL_ARCH_PRCTL = 158, ///< __NR_arch_prctl + SYSCALL_GETTID = 186, ///< __NR_gettid + SYSCALL_FUTEX = 202, ///< __NR_futex + SYSCALL_SET_TID_ADDRESS = 218, ///< __NR_set_tid_address + SYSCALL_CLOCK_GETTIME = 228, ///< __NR_clock_gettime + SYSCALL_EXIT_GROUP = 231, ///< __NR_exit_group + SYSCALL_TGKILL = 234, ///< __NR_tgkill + SYSCALL_OPENAT = 257, ///< __NR_openat + SYSCALL_SET_ROBUST_LIST = 273, ///< __NR_set_robust_list + SYSCALL_PRLIMIT64 = 302, ///< __NR_prlimit64 +}; + +struct Futex { + std::mutex Mutex; + std::condition_variable cv; + std::atomic *Addr; + uint32_t Val; +}; + +class SyscallHandler final { +public: + SyscallHandler(FEXCore::Context::Context *ctx) : CTX {ctx}, FM {ctx} {} + uint64_t HandleSyscall(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args); + + // XXX: This leaks memory. + // Need to know when to delete futexes + void EmplaceFutex(uint64_t Addr, Futex *futex) { + std::scoped_lock lk(FutexMutex); + Futexes[Addr] = futex; + } + + Futex *GetFutex(uint64_t Addr) { + std::scoped_lock lk (FutexMutex); + return Futexes[Addr]; + } + + void DefaultProgramBreak(FEXCore::Core::InternalThreadState *Thread, uint64_t Addr); + + void SetFilename(std::string const &File) { FM.SetFilename(File); } + std::string const & GetFilename() const { return FM.GetFilename(); } + +private: + FEXCore::Context::Context *CTX; + FileManager FM; + + // Futex management + std::unordered_map Futexes; + std::mutex FutexMutex; + // BRK management + uint64_t DataSpace {}; + uint64_t DataSpaceSize {}; + uint64_t DefaultProgramBreakAddress {}; + + // MMap management + uint64_t LastMMAP = 0xd000'0000; +}; +} diff --git a/Source/Interface/IR/IR.cpp b/Source/Interface/IR/IR.cpp new file mode 100644 index 000000000..a300535c5 --- /dev/null +++ b/Source/Interface/IR/IR.cpp @@ -0,0 +1,66 @@ +#include +#include + +namespace FEXCore::IR { +#define IROP_GETNAME_IMPL +#include "IRDefines.inc" + +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, uint64_t Arg) { + *out << "0x" << std::hex << Arg; +} + +static void PrintArg(std::stringstream *out, IRListView const* IR, NodeWrapper Arg) { + uintptr_t Data = IR->GetData(); + uintptr_t ListBegin = IR->GetListData(); + + OrderedNode *RealNode = reinterpret_cast(Arg.GetPtr(ListBegin)); + auto IROp = RealNode->Op(Data); + + *out << "%ssa" << std::to_string(Arg.NodeOffset / sizeof(OrderedNode)) << " i" << std::dec << (IROp->Size * 8); + if (IROp->Elements > 1) { + *out << "v" << std::dec << IROp->Elements; + } +} + +void Dump(std::stringstream *out, IRListView const* IR) { + uintptr_t Data = IR->GetData(); + uintptr_t ListBegin = IR->GetListData(); + + auto Begin = IR->begin(); + auto End = IR->end(); + while (Begin != End) { + auto Op = Begin(); + OrderedNode *RealNode = reinterpret_cast(Op->GetPtr(ListBegin)); + auto IROp = RealNode->Op(Data); + + auto Name = FEXCore::IR::GetName(IROp->Op); + + if (IROp->HasDest) { + *out << "%ssa" << std::to_string(Op->NodeOffset / sizeof(OrderedNode)) << " i" << std::dec << (IROp->Size * 8); + if (IROp->Elements > 1) { + *out << "v" << std::dec << IROp->Elements; + } + *out << " = "; + } + + *out << Name; + switch (IROp->Op) { + case IR::OP_BEGINBLOCK: + *out << " %ssa" << std::to_string(Op->ID()); + break; + default: break; + } + +#define IROP_ARGPRINTER_HELPER +#include "IRDefines.inc" + default: *out << ""; break; + } + + *out << "\n"; + + ++Begin; + } + +} + +} diff --git a/Source/Interface/IR/IR.json b/Source/Interface/IR/IR.json new file mode 100644 index 000000000..3e17763ab --- /dev/null +++ b/Source/Interface/IR/IR.json @@ -0,0 +1,526 @@ +{ + "Defines": [ + "constexpr static uint8_t COND_EQ = 0", + "constexpr static uint8_t COND_NEQ = 1", + "constexpr static uint8_t COND_CS = 2", + "constexpr static uint8_t COND_CC = 3", + "constexpr static uint8_t COND_MI = 4", + "constexpr static uint8_t COND_PL = 5", + "constexpr static uint8_t COND_VS = 6", + "constexpr static uint8_t COND_VC = 7", + "constexpr static uint8_t COND_HI = 8", + "constexpr static uint8_t COND_LS = 9", + "constexpr static uint8_t COND_GE = 10", + "constexpr static uint8_t COND_LT = 11", + "constexpr static uint8_t COND_GT = 12", + "constexpr static uint8_t COND_LE = 13" + ], + + "Ops": { + "Dummy": { + }, + "Constant": { + "HasDest": true, + "FixedDestSize": "8", + "Args": [ + "uint64_t", "Constant" + ] + }, + "BeginBlock": {}, + "EndBlock": { + "Args": [ + "uint64_t", "RIPIncrement" + ] + }, + + "Break": { + "Args": [ + "uint8_t", "Reason", + "uint8_t", "Literal" + ] + }, + + "EndFunction": {}, + "ExitFunction": {}, + + "Jump": { + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "CondJump": { + "SSAArgs": "2" + }, + + "Mov": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0)", + "SSAArgs": "1" + }, + + "CycleCounter": { + "HasDest": true, + "FixedDestSize": "8" + }, + + "LoadContext": { + "HasDest": true, + "DestSize": "Size", + "Args": [ + "uint8_t", "Size", + "uint32_t", "Offset" + ] + }, + + "StoreContext": { + "SSAArgs": "1", + "Args": [ + "uint8_t", "Size", + "uint32_t", "Offset" + ] + }, + + "LoadFlag": { + "HasDest": true, + "DestSize": "1", + "Args": [ + "uint32_t", "Flag" + ] + }, + + "StoreFlag": { + "SSAArgs": "1", + "Args": [ + "uint32_t", "Flag" + ] + }, + + "Syscall": { + "HasDest": true, + "FixedDestSize": "8", + "SSAArgs": "7" + }, + + "LoadMem": { + "HasDest": true, + "DestSize": "Size", + "SSAArgs": "1", + "Args": [ + "uint8_t", "Size" + ] + }, + + "StoreMem": { + "SSAArgs": "2", + "Args": [ + "uint8_t", "Size" + ] + }, + + "Add": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Sub": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Mul": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "UMul": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Div": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "UDiv": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Rem": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "URem": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "MulH": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "UMulH": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Or": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "And": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Xor": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Lshl": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Lshr": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Ashr": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Rol": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "Ror": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "LDiv": { + "HasDest": true, + "SSAArgs": "3" + }, + + "LUDiv": { + "HasDest": true, + "SSAArgs": "3" + }, + + "LRem": { + "HasDest": true, + "SSAArgs": "3" + }, + + "LURem": { + "HasDest": true, + "SSAArgs": "3" + }, + + "Zext": { + "HasDest": true, + "DestSize": "SrcSize / 4", + "SSAArgs": "1", + "Args": [ + "uint8_t", "SrcSize" + ] + }, + + "Sext": { + "HasDest": true, + "DestSize": "SrcSize / 4", + "SSAArgs": "1", + "Args": [ + "uint8_t", "SrcSize" + ] + }, + + "Neg": { + "HasDest": true, + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "Popcount": { + "HasDest": true, + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "FindLSB": { + "HasDest": true, + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "FindMSB": { + "HasDest": true, + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "Rev": { + "HasDest": true, + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "CPUID": { + "HasDest": true, + "FixedDestSize": "4", + "NumElements": "4", + "SSAArgs": "1" + }, + + "Bfi": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0)", + "SSAArgs": "2", + "Args": [ + "uint8_t", "Width", + "uint8_t", "lsb" + ] + }, + + "Bfe": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0)", + "SSAArgs": "1", + "Args": [ + "uint8_t", "Width", + "uint8_t", "lsb" + ] + }, + + "Sbfe": { + "HasDest": true, + "SSAArgs": "1", + "Args": [ + "uint8_t", "Width", + "uint8_t", "lsb" + ] + }, + + "Select": { + "HasDest": true, + "SSAArgs": "4", + "Args": [ + "uint8_t", "Cond" + ] + }, + + "CAS": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0)", + "SSAArgs": "3" + }, + + "CreateVector2": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0) * 2", + "SSAArgs": "2" + }, + + "CreateVector3": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0) * 3", + "SSAArgs": "3" + }, + + "CreateVector4": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0) * 4", + "SSAArgs": "4" + }, + + "SplatVector2": { + "HasDest": true, + "NumElements": "2", + "DestSize": "GetOpSize(ssa0) * 2", + "SSAArgs": "1" + }, + + "SplatVector3": { + "HasDest": true, + "NumElements": "3", + "DestSize": "GetOpSize(ssa0) * 3", + "SSAArgs": "1" + }, + + "SplatVector4": { + "HasDest": true, + "NumElements": "4", + "DestSize": "GetOpSize(ssa0) * 4", + "SSAArgs": "1" + }, + + "ExtractElement": { + "HasDest": true, + "DestSize": "GetOpSize(ssa0)", + "SSAArgs": "1", + "Args": [ + "uint8_t", "Idx" + ] + }, + + "VOr": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "VXor": { + "HasDest": true, + "Dispatcher": true, + "SSAArgs": "2" + }, + + "VAdd": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VSub": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VUMin": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VSMin": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VZip": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VZip2": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VCMPEQ": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VCMPGT": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VUShl": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VUShlS": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VUShr": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize" + ] + }, + + "VInsElement": { + "HasDest": true, + "SSAArgs": "2", + "Args": [ + "uint8_t", "RegisterSize", + "uint8_t", "ElementSize", + "uint8_t", "DestIdx", + "uint8_t", "SrcIdx" + ] + }, + + "Print": { + "DispatcherUnary": true, + "SSAArgs": "1" + }, + + "Last": { + "Last": true, + "Args": [] + } + } +} diff --git a/Source/Interface/IR/PassManager.cpp b/Source/Interface/IR/PassManager.cpp new file mode 100644 index 000000000..6931a164f --- /dev/null +++ b/Source/Interface/IR/PassManager.cpp @@ -0,0 +1,27 @@ +#include "Interface/IR/Passes.h" +#include "Interface/IR/PassManager.h" + +namespace FEXCore::IR { +void PassManager::AddDefaultPasses() { +// Passes.emplace_back(std::unique_ptr(CreateConstProp())); +// Passes.emplace_back(std::unique_ptr(CreateRedundantContextLoadElimination())); +// Passes.emplace_back(std::unique_ptr(CreateRedundantFlagCalculationEliminination())); +// Passes.emplace_back(std::unique_ptr(CreateSyscallOptimization())); +// Passes.emplace_back(std::unique_ptr(CreatePassDeadContextStoreElimination())); +// +// Passes.emplace_back(std::unique_ptr(CreateIRCompaction())); +} + +void PassManager::AddDefaultValidationPasses() { + Passes.emplace_back(std::unique_ptr(Validation::CreateIRValidation())); +} + +bool PassManager::Run(OpDispatchBuilder *Disp) { + bool Changed = false; + for (auto const &Pass : Passes) { + Changed |= Pass->Run(Disp); + } + return Changed; +} + +} diff --git a/Source/Interface/IR/PassManager.h b/Source/Interface/IR/PassManager.h new file mode 100644 index 000000000..379e5cbb8 --- /dev/null +++ b/Source/Interface/IR/PassManager.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include +#include + +namespace FEXCore::IR { +class OpDispatchBuilder; + +class Pass { +public: + virtual ~Pass() = default; + virtual bool Run(OpDispatchBuilder *Disp) = 0; +}; + +class PassManager final { +public: + void AddDefaultPasses(); + void AddDefaultValidationPasses(); + bool Run(OpDispatchBuilder *Disp); + +private: + std::vector> Passes; +}; +} + diff --git a/Source/Interface/IR/Passes.h b/Source/Interface/IR/Passes.h new file mode 100644 index 000000000..01c96aff0 --- /dev/null +++ b/Source/Interface/IR/Passes.h @@ -0,0 +1,17 @@ +#pragma once + +namespace FEXCore::IR { +class Pass; + +FEXCore::IR::Pass* CreateConstProp(); +FEXCore::IR::Pass* CreateRedundantContextLoadElimination(); +FEXCore::IR::Pass* CreatePassDeadContextStoreElimination(); +FEXCore::IR::Pass* CreateSyscallOptimization(); +FEXCore::IR::Pass* CreateRedundantFlagCalculationEliminination(); +FEXCore::IR::Pass* CreateIRCompaction(); + +namespace Validation { +FEXCore::IR::Pass* CreateIRValidation(); +} +} + diff --git a/Source/Interface/IR/Passes/ConstProp.cpp b/Source/Interface/IR/Passes/ConstProp.cpp new file mode 100644 index 000000000..7793f48b7 --- /dev/null +++ b/Source/Interface/IR/Passes/ConstProp.cpp @@ -0,0 +1,51 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" + +namespace FEXCore::IR { + +class ConstProp final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; +}; + +bool ConstProp::Run(OpDispatchBuilder *Disp) { + bool Changed = false; + auto CurrentIR = Disp->ViewIR(); + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + switch (IROp->Op) { + case OP_ZEXT: { + auto Op = IROp->C(); + uint64_t Constant; + if (Disp->IsValueConstant(Op->Header.Args[0], &Constant)) { + uint64_t NewConstant = Constant & ((1ULL << Op->SrcSize) - 1); + auto ConstantVal = Disp->_Constant(NewConstant); + Disp->ReplaceAllUsesWith(RealNode, ConstantVal); + Changed = true; + } + + break; + } + default: break; + } + + ++Begin; + } + + return Changed; +} + +FEXCore::IR::Pass* CreateConstProp() { + return new ConstProp{}; +} + +} diff --git a/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp b/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp new file mode 100644 index 000000000..f373a13d3 --- /dev/null +++ b/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp @@ -0,0 +1,116 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" +#include + +namespace FEXCore::IR { +class DCSE final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; +}; + +bool DCSE::Run(OpDispatchBuilder *Disp) { + //printf("Doing DCSE run\n"); + return false; +} + +FEXCore::IR::Pass* CreatePassDeadContextStoreElimination() { + return new DCSE{}; +} + +class RCLE final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; +}; + +static bool IsAlignedGPR(uint8_t Size, uint32_t Offset, uint8_t *greg) { + if (Size != 8) return false; + if (Offset & 0b111) return false; + if (Offset < offsetof(FEXCore::Core::CPUState, gregs[0]) || Offset > offsetof(FEXCore::Core::CPUState, gregs[15])) return false; + + *greg = (Offset - offsetof(FEXCore::Core::CPUState, gregs[0])) / 8; + return true; +} + +static bool IsGPR(uint32_t Offset, uint8_t *greg) { + if (Offset < offsetof(FEXCore::Core::CPUState, gregs[0]) || Offset > offsetof(FEXCore::Core::CPUState, gregs[15])) return false; + + *greg = (Offset - offsetof(FEXCore::Core::CPUState, gregs[0])) / 8; + return true; +} + + +bool RCLE::Run(OpDispatchBuilder *Disp) { + bool Changed = false; + auto CurrentIR = Disp->ViewIR(); + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + std::array LastValidGPRStores{}; + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + if (IROp->Op == OP_BEGINBLOCK || + IROp->Op == OP_ENDBLOCK || + IROp->Op == OP_JUMP || + IROp->Op == OP_CONDJUMP || + IROp->Op == OP_EXITFUNCTION) { + // We don't track across block boundaries + LastValidGPRStores.fill(nullptr); + } + + if (IROp->Op == OP_STORECONTEXT) { + auto Op = IROp->CW(); + // Make sure we are within GREG state + uint8_t greg = ~0; + if (IsAlignedGPR(Op->Size, Op->Offset, &greg)) { + FEXCore::IR::IROp_Header *ArgOp = reinterpret_cast(Op->Header.Args[0].GetPtr(ListBegin))->Op(DataBegin); + // Ensure we aren't doing a mismatched store + // XXX: We should really catch this in IR validation + if (ArgOp->Size == 8) { + LastValidGPRStores[greg] = &Op->Header.Args[0]; + } + else { + LastValidGPRStores[greg] = nullptr; + } + } else if (IsGPR(Op->Offset, &greg)) { + // If we aren't overwriting the whole state then we don't want to track this value + LastValidGPRStores[greg] = nullptr; + } + } + + if (IROp->Op == OP_LOADCONTEXT) { + auto Op = IROp->C(); + + // Make sure we are within GREG state + uint8_t greg = ~0; + if (IsAlignedGPR(Op->Size, Op->Offset, &greg)) { + if (LastValidGPRStores[greg] != nullptr) { + // If the last store matches this load value then we can replace the loaded value with the previous valid one + auto MovVal = Disp->_Mov(reinterpret_cast(LastValidGPRStores[greg]->GetPtr(ListBegin))); + Disp->ReplaceAllUsesWith(RealNode, MovVal); + Changed = true; + } + } else if (IsGPR(Op->Offset, &greg)) { + // If we aren't overwriting the whole state then we don't want to track this value + LastValidGPRStores[greg] = nullptr; // 0 is invalid + } + } + ++Begin; + } + + return Changed; +} + +FEXCore::IR::Pass* CreateRedundantContextLoadElimination() { + return new RCLE{}; +} + +} + + diff --git a/Source/Interface/IR/Passes/IRCompaction.cpp b/Source/Interface/IR/Passes/IRCompaction.cpp new file mode 100644 index 000000000..418baff1f --- /dev/null +++ b/Source/Interface/IR/Passes/IRCompaction.cpp @@ -0,0 +1,121 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" + +#include + +namespace FEXCore::IR { + +class IRCompaction final : public FEXCore::IR::Pass { +public: + IRCompaction(); + bool Run(OpDispatchBuilder *Disp) override; + +private: + OpDispatchBuilder LocalBuilder; + std::vector NodeLocationRemapper; +}; + +IRCompaction::IRCompaction() { + NodeLocationRemapper.resize(9000); +} + +bool IRCompaction::Run(OpDispatchBuilder *Disp) { + auto CurrentIR = Disp->ViewIR(); + auto LocalIR = LocalBuilder.ViewIR(); + uint32_t NodeCount = LocalIR.GetListSize() / sizeof(OrderedNode); + + // Reset our local working list + LocalBuilder.ResetWorkingList(); + if (NodeLocationRemapper.size() < NodeCount) { + NodeLocationRemapper.resize(NodeCount); + } + memset(&NodeLocationRemapper.at(0), 0xFF, NodeCount * sizeof(IR::NodeWrapper::NodeOffsetType)); + + uintptr_t LocalListBegin = LocalIR.GetListData(); + uintptr_t LocalDataBegin = LocalIR.GetData(); + + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + // This compaction pass is something that we need to ensure correct ordering and distances between IROps\ + // Later on we assume that an IROp's SSA value live range is its Node locations + // + // RA distance calculation is calculated purely on the Node locations + // So we just need to reorder those + // + // Additionally there may be some dead ops hanging out in the IR list that are orphaned. + // These can also be dropped during this pass + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + size_t OpSize = FEXCore::IR::GetSize(IROp->Op); + + // Allocate the ops locally for our local dispatch + auto LocalPair = LocalBuilder.AllocateRawOp(OpSize); + IR::NodeWrapper LocalNodeWrapper = LocalPair.Node->Wrapped(LocalListBegin); + + // Copy over the op + memcpy(LocalPair.first, IROp, OpSize); + + // Set our map remapper to map the new location + // Even nodes that don't have a destination need to be in this map + // Need to be able to remap branch targets any other bits + NodeLocationRemapper[WrapperOp->ID()] = LocalNodeWrapper.ID(); + ++Begin; + } + + Begin = CurrentIR.begin(); + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + NodeWrapper LocalNodeWrapper = NodeWrapper::WrapOffset(NodeLocationRemapper[WrapperOp->ID()] * sizeof(OrderedNode)); + OrderedNode *LocalNode = reinterpret_cast(LocalNodeWrapper.GetPtr(LocalListBegin)); + FEXCore::IR::IROp_Header *LocalIROp = LocalNode->Op(LocalDataBegin); + + // Now that we have the op copied over, we need to modify SSA values to point to the new correct locations + for (uint8_t i = 0; i < IROp->NumArgs; ++i) { + NodeWrapper OldArg = IROp->Args[i]; + LogMan::Throw::A(NodeLocationRemapper[OldArg.ID()] != ~0U, "Tried remapping unfound node"); + LocalIROp->Args[i].NodeOffset = NodeLocationRemapper[OldArg.ID()] * sizeof(OrderedNode); + } + ++Begin; + } + +// uintptr_t OldListSize = CurrentIR.GetListSize(); +// uintptr_t OldDataSize = CurrentIR.GetDataSize(); +// +// uintptr_t NewListSize = LocalIR.GetListSize(); +// uintptr_t NewDataSize = LocalIR.GetDataSize(); +// +// if (NewListSize < OldListSize || +// NewDataSize < OldDataSize) { +// if (NewListSize < OldListSize) { +// LogMan::Msg::D("Shaved %ld bytes off the list size", OldListSize - NewListSize); +// } +// if (NewDataSize < OldDataSize) { +// LogMan::Msg::D("Shaved %ld bytes off the data size", OldDataSize - NewDataSize); +// } +// } + +// if (NewListSize > OldListSize || +// NewDataSize > OldDataSize) { +// LogMan::Msg::A("Whoa. Compaction made the IR a different size when it shouldn't have. 0x%lx > 0x%lx or 0x%lx > 0x%lx",NewListSize, OldListSize, NewDataSize, OldDataSize); +// } + + Disp->CopyData(LocalBuilder); + + return true; +} + +FEXCore::IR::Pass* CreateIRCompaction() { + return new IRCompaction{}; +} + +} diff --git a/Source/Interface/IR/Passes/IRValidation.cpp b/Source/Interface/IR/Passes/IRValidation.cpp new file mode 100644 index 000000000..1e9f66a70 --- /dev/null +++ b/Source/Interface/IR/Passes/IRValidation.cpp @@ -0,0 +1,153 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" + +#include + +namespace FEXCore::IR::Validation { + +struct BlockInfo { + IR::NodeWrapper *Begin; + IR::NodeWrapper *End; + + bool HasExit; + + std::vector Predecessors; + std::vector Successors; +}; + +class IRValidation final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; + +private: + std::unordered_map OffsetToBlockMap; +}; + +bool IRValidation::Run(OpDispatchBuilder *Disp) { + bool HadError = false; + auto CurrentIR = Disp->ViewIR(); + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + bool InBlock = false; + BlockInfo *CurrentBlock {}; + std::ostringstream Errors; + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + uint8_t OpSize = IROp->Size; + + if (IROp->HasDest) { + HadError |= OpSize == 0; + if (OpSize == 0) { + Errors << "%ssa" << WrapperOp->NodeOffset << ": Had destination but with no size" << std::endl; + } + } + + switch (IROp->Op) { + case OP_BEGINBLOCK: { + HadError |= InBlock; + if (InBlock) { + Errors << "BasicBlock " << WrapperOp->NodeOffset << ": Begin in middle of block" << std::endl; + } + + auto Block = OffsetToBlockMap.try_emplace(WrapperOp->NodeOffset, BlockInfo{}).first; + CurrentBlock = &Block->second; + CurrentBlock->Begin = WrapperOp; + InBlock = true; + break; + } + + case OP_ENDBLOCK: { + HadError |= !InBlock; + if (!InBlock) { + Errors << "BasicBlock " << WrapperOp->NodeOffset << ": End loose without a begin" << std::endl; + } + + if (CurrentBlock) { + // XXX: Enable once fallthrough is handled + // HadError |= !CurrentBlock->HasExit && CurrentBlock->Successors.size() == 0; + // if (!CurrentBlock->HasExit && CurrentBlock->Successors.size() == 0) { + // Errors << "BasicBlock " << WrapperOp->NodeOffset << ": Didn't have an exit and didn't have any successors. (Fallthrough?)" << std::endl; + // } + CurrentBlock->End = WrapperOp; + CurrentBlock = nullptr; + } + InBlock = false; + break; + } + case IR::OP_EXITFUNCTION: + case IR::OP_ENDFUNCTION: { + if (CurrentBlock) { + CurrentBlock->HasExit = true; + } + break; + } + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); + auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[1]); + if (CurrentBlock) { + CurrentBlock->Successors.emplace_back(IterLocation()); + } + + OrderedNode *TargetNode = reinterpret_cast(IterLocation()->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *TargetOp = TargetNode->Op(DataBegin); + HadError |= TargetOp->Op != OP_BEGINBLOCK; + if (TargetOp->Op != OP_BEGINBLOCK) { + Errors << "CondJump " << WrapperOp->NodeOffset << ": CondJump to Op that isn't the begining of a block" << std::endl; + } + else { + auto Block = OffsetToBlockMap.try_emplace(IterLocation()->NodeOffset, BlockInfo{}).first; + Block->second.Predecessors.emplace_back(CurrentBlock->Begin); + } + + break; + } + + case IR::OP_JUMP: { + auto Op = IROp->C(); + auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[0]); + if (CurrentBlock) { + CurrentBlock->Successors.emplace_back(IterLocation()); + } + + OrderedNode *TargetNode = reinterpret_cast(IterLocation()->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *TargetOp = TargetNode->Op(DataBegin); + HadError |= TargetOp->Op != OP_BEGINBLOCK; + if (TargetOp->Op != OP_BEGINBLOCK) { + Errors << "Jump " << WrapperOp->NodeOffset << ": Jump to Op that isn't the begining of a block" << std::endl; + } + else { + auto Block = OffsetToBlockMap.try_emplace(IterLocation()->NodeOffset, BlockInfo{}).first; + Block->second.Predecessors.emplace_back(CurrentBlock->Begin); + } + break; + } + + default: + //LogMan::Msg::A("Unknown IR Op: %d(%s)", IROp->Op, FEXCore::IR::GetName(IROp->Op).data()); + break; + } + + ++Begin; + } + + if (HadError) { + std::stringstream Out; + FEXCore::IR::Dump(&Out, &CurrentIR); + + std::cerr << Errors.str() << std::endl << Out.str() << std::endl; + } + return false; +} + +FEXCore::IR::Pass* CreateIRValidation() { + return new IRValidation{}; +} +} diff --git a/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp new file mode 100644 index 000000000..51df22214 --- /dev/null +++ b/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -0,0 +1,66 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" + +namespace FEXCore::IR { + +class RedundantFlagCalculationEliminination final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; +}; + +bool RedundantFlagCalculationEliminination::Run(OpDispatchBuilder *Disp) { + bool Changed = false; + auto CurrentIR = Disp->ViewIR(); + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + std::array LastValidFlagStores{}; + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + if (IROp->Op == OP_BEGINBLOCK || + IROp->Op == OP_ENDBLOCK || + IROp->Op == OP_JUMP || + IROp->Op == OP_CONDJUMP || + IROp->Op == OP_EXITFUNCTION) { + // We don't track across block boundaries + LastValidFlagStores.fill(nullptr); + } + + if (IROp->Op == OP_STOREFLAG) { + auto Op = IROp->CW(); + + // If we have had a valid flag store previously and it hasn't been touched until this new store + // Then just delete the old one and let DCE to take care of the rest + if (LastValidFlagStores[Op->Flag] != nullptr) { + Disp->Unlink(LastValidFlagStores[Op->Flag]); + Changed = true; + } + + // Set this node as the last one valid for this flag + LastValidFlagStores[Op->Flag] = RealNode; + } + else if (IROp->Op == OP_LOADFLAG) { + auto Op = IROp->CW(); + + // If we loaded a flag then we can't track past this + LastValidFlagStores[Op->Flag] = nullptr; + } + + ++Begin; + } + + return Changed; +} + +FEXCore::IR::Pass* CreateRedundantFlagCalculationEliminination() { + return new RedundantFlagCalculationEliminination{}; +} + +} diff --git a/Source/Interface/IR/Passes/SyscallOptimization.cpp b/Source/Interface/IR/Passes/SyscallOptimization.cpp new file mode 100644 index 000000000..98b79d74d --- /dev/null +++ b/Source/Interface/IR/Passes/SyscallOptimization.cpp @@ -0,0 +1,46 @@ +#include "Interface/IR/PassManager.h" +#include "Interface/Core/OpcodeDispatcher.h" + +#include "LogManager.h" + +namespace FEXCore::IR { + +class SyscallOptimization final : public FEXCore::IR::Pass { +public: + bool Run(OpDispatchBuilder *Disp) override; +}; + +bool SyscallOptimization::Run(OpDispatchBuilder *Disp) { + bool Changed = false; + auto CurrentIR = Disp->ViewIR(); + uintptr_t ListBegin = CurrentIR.GetListData(); + uintptr_t DataBegin = CurrentIR.GetData(); + + IR::NodeWrapperIterator Begin = CurrentIR.begin(); + IR::NodeWrapperIterator End = CurrentIR.end(); + + while (Begin != End) { + NodeWrapper *WrapperOp = Begin(); + OrderedNode *RealNode = reinterpret_cast(WrapperOp->GetPtr(ListBegin)); + FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin); + + if (IROp->Op == FEXCore::IR::OP_SYSCALL) { + // Is the first argument a constant? + uint64_t Constant; + if (Disp->IsValueConstant(IROp->Args[0], &Constant)) { + // LogMan::Msg::A("Whoa. Syscall argument is constant: %ld", Constant); + Changed = true; + } + + } + ++Begin; + } + + return Changed; +} + +FEXCore::IR::Pass* CreateSyscallOptimization() { + return new SyscallOptimization{}; +} + +} diff --git a/Source/Interface/Memory/MemMapper.cpp b/Source/Interface/Memory/MemMapper.cpp new file mode 100644 index 000000000..31e126064 --- /dev/null +++ b/Source/Interface/Memory/MemMapper.cpp @@ -0,0 +1,61 @@ +#include "LogManager.h" +#include "Interface/Memory/MemMapper.h" +#include +#include +#include + +namespace FEXCore::Memory { + + void *MemMapper::MapRegion(uint64_t Offset, size_t Size, bool Fixed) { + return MapRegion(Offset, Size, PROT_READ | PROT_WRITE, Fixed); + } + + void *MemMapper::ChangeMappedRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed) { + uintptr_t PtrOffset = reinterpret_cast(SHM->Object.Ptr) + Offset; + + void *Ptr = mmap(reinterpret_cast(PtrOffset), Size, Flags, + MAP_POPULATE | MAP_SHARED | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset); + + if (Ptr == MAP_FAILED) { + LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size); + return nullptr; + } + + return Ptr; + } + + void *MemMapper::MapRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed) { + uintptr_t PtrOffset = reinterpret_cast(SHM->Object.Ptr) + Offset; + + void *Ptr = mmap(reinterpret_cast(PtrOffset), Size, Flags, + MAP_SHARED | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset); + + if (Ptr == MAP_FAILED) { + LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size); + return nullptr; + } + + MappedRegions.emplace_back(MemRegion{Ptr, Offset, Size}); + + return Ptr; + } + + void MemMapper::UnmapRegion(void *Ptr, size_t Size) { + auto it = std::find(MappedRegions.begin(), MappedRegions.end(), Ptr); + if (it != MappedRegions.end()) { + munmap(Ptr, Size); + MappedRegions.erase(it); + } + } + + void *MemMapper::GetPointer(uint64_t Offset) { + for (auto const &Region : MappedRegions) { + if (Region.contains(Offset)) { + return reinterpret_cast(reinterpret_cast(Region.Ptr) + (Offset - Region.Offset)); + } + } + + return nullptr; + } +} + diff --git a/Source/Interface/Memory/MemMapper.h b/Source/Interface/Memory/MemMapper.h new file mode 100644 index 000000000..ec4e63e3a --- /dev/null +++ b/Source/Interface/Memory/MemMapper.h @@ -0,0 +1,44 @@ +#pragma once +#include "Interface/Memory/SharedMem.h" +#include +#include +#include + +namespace FEXCore::Context { +struct Context; +} + +namespace FEXCore::Memory { + + class MemMapper final { + friend struct FEXCore::Context::Context; + public: + void SetBaseRegion(FEXCore::SHM::SHMObject *NewSHM) { + SHM = reinterpret_cast(NewSHM); + } + + void *MapRegion(uint64_t Offset, size_t Size, bool Fixed = true); + void *MapRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed = true); + void *ChangeMappedRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed = true); + + void UnmapRegion(void *Ptr, size_t Size); + + void *GetMemoryBase() { return SHM->Object.Ptr; } + + void *GetPointer(uint64_t Offset); + template + T GetPointer(uint64_t Offset) { + return reinterpret_cast(GetPointer(Offset)); + } + + template + T GetBaseOffset(uint64_t Offset) { + return reinterpret_cast((reinterpret_cast(GetMemoryBase()) + Offset)); + } + + private: + FEXCore::SHM::InternalSHMObject *SHM; + std::vector MappedRegions{}; + }; +} + diff --git a/Source/Interface/Memory/SharedMem.cpp b/Source/Interface/Memory/SharedMem.cpp new file mode 100644 index 000000000..7e42a7bcb --- /dev/null +++ b/Source/Interface/Memory/SharedMem.cpp @@ -0,0 +1,61 @@ +#include "LogManager.h" +#include "Interface/Memory/SharedMem.h" +#include +#include +#include +#include +#include +#include +#include + +namespace FEXCore::SHM { + void *MapRegionFlags(InternalSHMObject *SHM, size_t Offset, size_t Size, uint32_t flags, bool Fixed) { + uintptr_t PtrOffset = reinterpret_cast(SHM->Object.Ptr) + Offset; + + void *Ptr = mmap(reinterpret_cast(PtrOffset), Size, flags, + MAP_PRIVATE | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset); + if (Ptr == MAP_FAILED) { + LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size); + return nullptr; + } + + return Ptr; + } + + SHMObject *AllocateSHMRegion(size_t Size) { + InternalSHMObject *SHM = new InternalSHMObject{}; + const std::string SHMName = "FEXCore" + std::to_string(getpid()); + + SHM->SHMFD = shm_open(SHMName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600); + if (SHM->SHMFD == -1) { + LogMan::Msg::E("Couldn't open SHM"); + goto err; + } + + // Unlink the SHM file immediately so it doesn't get left around + shm_unlink(SHMName.c_str()); + + // Extend the SHM to the size we requested + if (ftruncate(SHM->SHMFD, Size) != 0) { + LogMan::Msg::E("Couldn't set SHM size"); + goto err; + } + + SHM->Object.Ptr = MapRegionFlags(SHM, 0, Size, PROT_READ | PROT_WRITE, false); + if (SHM->Object.Ptr == nullptr) { + goto err; + } + + return &SHM->Object; +err: + delete SHM; + return nullptr; + } + + void DestroyRegion(SHMObject *SHM) { + InternalSHMObject *Obj = reinterpret_cast(SHM); + close(Obj->SHMFD); + delete Obj; + } + +} diff --git a/Source/Interface/Memory/SharedMem.h b/Source/Interface/Memory/SharedMem.h new file mode 100644 index 000000000..75f4ec4c1 --- /dev/null +++ b/Source/Interface/Memory/SharedMem.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include +#include + +namespace FEXCore::SHM { + struct InternalSHMObject { + SHMObject Object; + int SHMFD; + }; +} diff --git a/Source/Test/CMakeLists.txt b/Source/Test/CMakeLists.txt new file mode 100644 index 000000000..84ccb0f17 --- /dev/null +++ b/Source/Test/CMakeLists.txt @@ -0,0 +1,16 @@ +set (NAME IRTest) +set (SRCS IRTest.cpp) + +add_executable(${NAME} ${SRCS}) +add_dependencies(${NAME} IR_INC) + +target_link_libraries(${NAME} ${PROJECT_NAME} SonicUtils) + +set (NAME LLVMIRTest) +set (SRCS LLVMIRTest.cpp) + +add_executable(${NAME} ${SRCS}) +add_dependencies(${NAME} IR_INC) + +target_link_libraries(${NAME} ${PROJECT_NAME} SonicUtils) + diff --git a/Source/Test/IRTest.cpp b/Source/Test/IRTest.cpp new file mode 100644 index 000000000..7b163df05 --- /dev/null +++ b/Source/Test/IRTest.cpp @@ -0,0 +1,11 @@ +#include + +#include "LogManager.h" +#include +#include +#include +#include + +int main(int argc, char **argv) { + printf("IR Test\n"); +} diff --git a/Source/Test/LLVMIRTest.cpp b/Source/Test/LLVMIRTest.cpp new file mode 100644 index 000000000..c168a5084 --- /dev/null +++ b/Source/Test/LLVMIRTest.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int main(int argc, char **argv) { + printf("LLVM Test\n"); + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + + auto ContextRef = LLVMContextCreate(); + auto Con = *llvm::unwrap(&ContextRef); + auto MainModule = new llvm::Module("Main Module", *Con); + auto IRBuilder = new llvm::IRBuilder<>(*Con); + + using namespace llvm; + + Type *i64 = Type::getInt64Ty(*Con); + auto FunctionType = FunctionType::get(Type::getVoidTy(*Con), + { + i64, + }, false); + + + legacy::PassManager PM; + PassManagerBuilder PMBuilder; + PMBuilder.OptLevel = 3; + PMBuilder.populateModulePassManager(PM); + + std::string Empty; + auto start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < 10000; ++i) + { + + auto Func = Function::Create(FunctionType, + Function::ExternalLinkage, + Empty, + MainModule); + Func->setCallingConv(CallingConv::C); + + { + auto Entry = BasicBlock::Create(*Con, Empty, Func); + IRBuilder->SetInsertPoint(Entry); + + auto ExitBlock = BasicBlock::Create(*Con, Empty, Func); + + IRBuilder->SetInsertPoint(ExitBlock); + IRBuilder->CreateRetVoid(); + + IRBuilder->SetInsertPoint(Entry); + IRBuilder->CreateBr(ExitBlock); + } + + //printf("i: %d\n", i); + PM.run(*MainModule); + + auto FunctionList = &MainModule->getFunctionList(); + FunctionList->clear(); + } + auto end = std::chrono::high_resolution_clock::now(); + auto diff = end - start; + + printf("Took %ld(%ldms) nanoseconds\n", diff.count(), std::chrono::duration_cast(diff).count()); + return 0; +} diff --git a/docs/CPUBackends.md b/docs/CPUBackends.md new file mode 100644 index 000000000..dcf3812f9 --- /dev/null +++ b/docs/CPUBackends.md @@ -0,0 +1,29 @@ +# FEXCore CPU Backends +--- +FEXCore supports multiple CPU emulation backends. All of which ingest the IR that we have been generating. + +## IR Interpreter +The first one is the easiest. This just walks the IR list and interprets the IR as it goes through it. It isn't meant to be fast and is for debugging purposes. +This is used to easily inspect what is going on with the code generation and making sure logic is sound. Will most likely last in to perpetuity since it isn't exactly difficult to maintain and it is useful to have around + +## IR JIT +**Not yet implemented** +This is meant to be our first JIT of call and will serve multiple purposes. It'll be the JIT that is used for our runtime compilation of code. +This means it needs to be fast during compilation and have decent runtime performance. +Good chance that we will need to implement multiple of these depending on host architecture with some code reuse between them. +This JIT will also be what we use for gathering sampling data for passing off to our LLVM JIT for tiered recompilation and offline compilation later. +Should use xbyak for our x86-64 host and Vixl for our AArch64 host. For other targets in the future we will see what is available + +## LLVM JIT +This is the last JIT that should theoretically generate the most optimal code for us. +This *should* be used for a tiered recompiler system using sampling data from the IR JIT. +Currently it just supports being a regular JIT core. There are still some hard problems that need to be solved with this JIT since LLVM isn't quite ideal for generating code for a JIT. + +# Future ideas +--- +* Support a custom ABI on the LLVM JIT to generate more optimal code that is shared between the IR JIT and LLVM JIT + * This can let us do fun things like reserve host registers for guest register state. Trivial in the IR JIT, not so much for LLVM. + * Needs a local build of LLVM that we statically link in. +* Create an inline ASM or JIT'd dispatcher loop. Will allow our JITs to be more optimal by reserving more registers for guest state. +* WebAssmembly or other browser language? + * Might allow decent runtime performance of things emulated in a browser. Could be interesting. diff --git a/docs/CustomCPUBackend.md b/docs/CustomCPUBackend.md new file mode 100644 index 000000000..6e554b5f7 --- /dev/null +++ b/docs/CustomCPUBackend.md @@ -0,0 +1,17 @@ +# FEXCore custom CPU backends +--- +Custom CPU backends can be useful for testing purposes or wanting to support situations that FEXCore doesn't currently understand. +The FEXCore::Context namespace provides a `SetCustomCPUBackendFactory` function for providing a factory function pointer to the core. This function will be used if the `DEFAULTCORE` configuration option is set to `CUSTOM`. +If the guest code creates more threads then the CPU factory function will be invoked for creating a CPUBackend per thread. If you don't want a unique CPUBackend object per thread then that needs to be handled by the user. + +It's recommended to store the pointers provided to the factory function for later use. +`FEXCore::Context::Context*` - Is a pointer to previously generated context object +`FEXCore::Core::ThreadState*` - Is a pointer to a thread's state. Lives for as long as the guest thread is alive. +To use this factory, one must override the provided `FEXCore::CPU::CPUBackend` class with a custom one. This factory function then should return a newly allocated class. + +`FEXCore::CPU::CPUBackend::GetName` - Returns an `std::string` for the name of this core +`FEXCore::CPU::CPUBackend::CompileCode` - Provides the CPUBackend with potentially an IR and DebugData for compiling code. Returns a pointer that needs to be long lasting to a piece of code that will be executed for the particular RIP. +Both IR and DebugData can be null if `NeedsOpDispatch` returns false +`FEXCore::CPU::CPUBackend::MapRegion` - This function needs to be implemented if the CPUBackend needs to map host facing memory in to the backend. Allows setting up virtual memory mapping if required +`FEXCore::CPU::CPUBackend::Initialize` - Called after the guest memory is initialized and all state is ready for the code to start initializing. Gets called just before the CPUBackend starts executing code for the first time. +`FEXCore::CPU::CPUBackend::NeedsOpDispatch` - Tells FEXCore if the backend needs the FEXCore IR and DebugData provided to it. This can be useful if FEXCore hits something it doesn't understand but it doesn't matter since the CPUBackend can still understand it from raw x86-64 (ex VM based CPU backend). diff --git a/docs/Frontend.md b/docs/Frontend.md new file mode 100644 index 000000000..a0733acd8 --- /dev/null +++ b/docs/Frontend.md @@ -0,0 +1,43 @@ +# FEXCore Frontend +--- +The FEXCore frontend's job is to translate an incoming x86-64 instruction stream in to a more easily digested version of x86. +This effectively expands x86-64 instruction encodings to be more easily ingested later on in the process. +This ends up being essential to allowing our IR translation step to be less strenious. It can decode a "common" expanded instruction format rather than various things that x86-supports. +For a simple example, x86-64's primary op table has ALU ops that duplicate themselves at least six times with minor differences between each. The frontend is able to decode a large amount of these ops to the "same" op that the IR translation understands more readily. +This works for most instructions that follow a common decoding scheme, although there are instructions that don't follow the rules and must be handled explicitly elsewhere. + +An example of decoded instructions: +``` +00 C0: add al,al +04 01: add al, 0x1 +``` +These two instructions have a different encoding scheme but they are just an add. +They end up decoding to a generic format with the same destination operand but different sources. +May look subtle but there end up being far more complex cases and we don't want to handle hundreds of instructions differently. +After the frontend is done decoding the instruction stream, it passes the output over to the OpDispatcher for translating to our IR. + +## Multiblock +--- +The Frontend has an additional duty. Since it is the main piece of code that understands the guest x86-64 code; It is also what does analysis of control flow to determine if we can end up compiling multiple blocks of guest code. +The Frontend already has to determine if it has hit a block ending instruction. This is anything that changes control flow. This feeds in to the analysis system to look at conditional branches to see if we can keep compiling code at the target location in the same functional unit. + +Short example: +``` +test eax, eax +jne .Continue +ret <--- We can continue past this instruction, which is an unconditional block ender +.Continue: +``` + +These sorts of patterns crop up extensively in compiled code. A large amount of traditional JITs will end up ending the block at any sort of conditional branch instruction. +If the analysis can determine the target conditional branch location, we can then know that the code can keep compiling past an unconditional block ender instruction. +This works for both backwards branches and forward branches. + +### Additional reading +--- +There are other emulators out there that implement multiblock JIT compilation with some success. +The best example of this that I know of is the [Dolphin GameCube and Wii Emulator](https://github.com/dolphin-emu/dolphin) Where I implemented the initial multiblock implementation. +One of the major limitations with a console emulator is that you can run in to infinite loops on backedges when using multiblock compilation. This is due to console emulation being able to run an infinite loop and let Interrupts or some other state cause it to break out. +Luckily since we are a userspace emulator we don't have to deal with this problem. If an application has written an infinite loop, then without another thread running, it'll be a true infinite loop. +Additionally luckily is that we are going to emulate the strong memory model of x86-64 and also support true threads, this will mean that we don't need to do any manual thread scheduling in our emulator and switch between virtual threads. + diff --git a/docs/IR.md b/docs/IR.md new file mode 100644 index 000000000..16c128d61 --- /dev/null +++ b/docs/IR.md @@ -0,0 +1,32 @@ +# FEXCore IR +--- +The IR for the FEXCore is an SSA based IR that is generated from the incoming x86-64 assembly. +SSA is quite nice to work with when translating the x86-64 code to the IR, when optimizing that code with custom optimization passes, and also passing that IR to our CPU backends. + +## Emulation IR considerations +* We have explicitly sized IR variables + * Supports traditional element sizes of 1,2,4,8 bytes and some 16byte ops + * Supports arbitrary number of vector elements + * The op determines if something is float or integer based. +* Clear separation of scalar IR ops and vector IR ops + * ex, MUL versus VMUL +* We have explicit Load/Store context IR ops + * This allows us to have a clear separation between guest memory and tracked x86-64 state +* We have an explicit CPUID IR op + * This allows us to return fairly complex data (4 registers of data) and also having an easier optimization for constant CPUID functions + * So if we const-prop the CPUID function then it'll just const-prop further along +* We have an explicit syscall op + * The syscall op is fairly complex as well, same with CPUID that if the syscall function is const-prop then we can directly call the syscall handler + * Can save overhead by removing call overheads +* The IR supports branching from one block to another + * Has a conditional branch instruction that either branches to the target branch or falls through to the next block + * Has an unconditional branch to explicitly jump to a block instead of falling through + * **There is a desire to follow LLVM semantics around block limitations but it isn't currently strictly enforced** +* Supports a debug ```Print``` Op for printing out values for debug viewing +* Supports explicit Load/Store memory IR ops + * This is for accessing guest memory and will do the memory offset translation in to the VM's memory space + * This is done by just adding the VM memory base to the 64bit address passed in + * This is done in a manner that the application **can** escape from the VM and isn't meant to be safe + * There is an option for JITs to validate the memory region prior to accessing for ensuring correctness +* IR is generated from a JSON file, fairly straightforward to extend. + * Read the python generation file to determine the extent of what it can do diff --git a/docs/OpDispatcher.md b/docs/OpDispatcher.md new file mode 100644 index 000000000..ac5db3272 --- /dev/null +++ b/docs/OpDispatcher.md @@ -0,0 +1,47 @@ +# FEXCore OpDispatcher +--- +The OpDispatcher is the step of the recompiler that takes the output from the Frontend and translates it to our IR. +Since the x86-64 instruction set is so large (>1000 instructions in the current FEXCore tables) we need to reduce this down to something more manageable. +We will ingest our decoded x86-64 instructions and translate them down to more basic IR operations. The number of IR ops are currently in the dozens which is a lot easier to handle. +Once we have translated to the IR then we need to pass the IR over to optimization passes or our JIT cores. + +Ex: +``` + mov rax,0x1 + mov rdi,0x1 + mov rsi,0x20 + mov rdx,0x1 + syscall + hlt + ``` + Translates to the IR of: + ``` +BeginBlock + %ssa8 i32 = Constant 0x1 + StoreContext 0x8, 0x8, %ssa8 + %ssa64 i32 = Constant 0x1 + StoreContext 0x8, 0x30, %ssa64 + %ssa120 i32 = Constant 0x1f + StoreContext 0x8, 0x28, %ssa120 + %ssa176 i32 = Constant 0x1 + StoreContext 0x8, 0x20, %ssa176 + %ssa232 i64 = LoadContext 0x8, 0x8 + %ssa264 i64 = LoadContext 0x8, 0x30 + %ssa296 i64 = LoadContext 0x8, 0x28 + %ssa328 i64 = LoadContext 0x8, 0x20 + %ssa360 i64 = LoadContext 0x8, 0x58 + %ssa392 i64 = LoadContext 0x8, 0x48 + %ssa424 i64 = LoadContext 0x8, 0x50 + %ssa456 i64 = Syscall%ssa232, %ssa264, %ssa296, %ssa328, %ssa360, %ssa392, %ssa424 + StoreContext 0x8, 0x8, %ssa456 + BeginBlock + EndBlock 0x1e + ExitFunction +``` +### Multiblock +--- +An additional duty of the OpDispatcher is to handle the metadata that the Frontend provides for supporting multiblock. +The IR provides most of the functionality required for supporting robust branching and function creation required for generating large blocks of code translated from x86-64 emulation. +This is required since in the ideal situation we will be doing function level translation of x86-64 guest code to our IR. +The IR is currently lacking any idea of flags or PHI nodes, which can be problematic when optimizing branch heavy code. The good thing is that the LLVM JIT can use a mem to reg pass to minimize a large number of this code. +It **will** be required to improve the IR further once the runtime JIT becomes a higher priority diff --git a/docs/OptimizationPasses.md b/docs/OptimizationPasses.md new file mode 100644 index 000000000..9fe86358d --- /dev/null +++ b/docs/OptimizationPasses.md @@ -0,0 +1,36 @@ +# FEXCore IR Optimization passes +--- +**This is very much a WIP since these optimization passes aren't in code yet** +## Pass Managers +* Need Function level optimization pass manager +* Need block level optimization pass manager +### Dead Store Elimination +We need to do dead store elimination because LLVM can't always handle elimination of our loadstores +This is very apparent when we are doing flag calculations and LLVM isn't able to remove them +This is mainly just an issue around the context loadstores. +We will want this more when the IRJIT comes online. +### Dead flag elimination +X86-64 is a fairly disguting ISA in that it calculates a bunch of flags on almost all instructions. +We need eliminate redundant flag calculations that end up being being overwritten without being used. +This happens *constantly* and in most cases the flag calculation takes significantly more work than the basic op by itself +Good chance that breaking out the flags to independent memory locations will make this easier. Or just adding ops for flag handling. +### Dead Code Elimination +There are a lot of cases that code will be generated that is immediately dead afterwards. +Flag calculation elimination will produce a lot of dead code that needs to get removed. +Additionally there are a decent amount of x86-64 instructions that store their results in to multiple registers and then the next instruction overwrites one of those instructions. +Multiply and Divide being a big one, since x86 calculates these at higher precision. +These can rely significantly tracking liveness between LoadContext and StoreContext ops +### ABI register elimination pass +This one is very fun and will reduce a decent amount of work that the JIT needs to do. +When we are targeting a specific x86-64 ABI and we know that we have translated a block of code that is the entire function. +We can eliminate stores to the context that by ABI standards is a temporary register. +We will be able to know exactly that these are dead and just remove the store (and run all the passes that optimize the rest away afterwards). +### Loadstore coalescing pass +Large amount of x86-64 instructions load or store registers in order from the context. +We can merge these in to loadstore pair ops to improve perf +### Function level heuristic pass +Once we know that a function is a true full recompile we can do some additional optimizations. +Remove any final flag stores. We know that a compiler won't pass flags past a function call boundry(It doesn't exist in the ABI) +Remove any loadstores to the context mid function, only do a final store at the end of the function and do loads at the start. Which means ops just map registers directly throughout the entire function. +### SIMD coalescing pass? +When operating on older MMX ops(64bit SIMD) and they may end up up generating some independent ops that can be coalesced in to a 128bit op diff --git a/include/FEXCore/Config/Config.h b/include/FEXCore/Config/Config.h new file mode 100644 index 000000000..0a20a53e6 --- /dev/null +++ b/include/FEXCore/Config/Config.h @@ -0,0 +1,24 @@ +#pragma once +#include + +#include + +namespace FEXCore::Config { + enum ConfigOption { + CONFIG_MULTIBLOCK, + CONFIG_MAXBLOCKINST, + CONFIG_DEFAULTCORE, + CONFIG_VIRTUALMEMSIZE, + CONFIG_SINGLESTEP, + }; + + enum ConfigCore { + CONFIG_INTERPRETER, + CONFIG_IRJIT, + CONFIG_LLVMJIT, + CONFIG_CUSTOM, + }; + + void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, uint64_t Config); + uint64_t GetConfig(FEXCore::Context::Context *CTX, ConfigOption Option); +} diff --git a/include/FEXCore/Core/CPUBackend.h b/include/FEXCore/Core/CPUBackend.h new file mode 100644 index 000000000..b0b2029d6 --- /dev/null +++ b/include/FEXCore/Core/CPUBackend.h @@ -0,0 +1,74 @@ +#pragma once +#include +#include + +namespace FEXCore { + +namespace IR { + template + class IRListView; +} + +namespace Core { + struct DebugData; +} + +namespace CPU { +class InterpreterCore; +class JITCore; +class LLVMCore; + + class CPUBackend { + public: + virtual ~CPUBackend() = default; + /** + * @return The name of this backend + */ + virtual std::string GetName() = 0; + /** + * @brief Tells this CPUBackend to compile code for the provided IR and DebugData + * + * The returned pointer needs to be long lived and be executable in the host environment + * FEXCore's frontend will store this pointer in to a cache for the current RIP when this was executed + * + * This is a thread specific compilation unit since there is one CPUBackend per guest thread + * + * If NeedsOpDispatch is returning false then IR and DebugData may be null and the expectation is that the code will still compile + * FEXCore::Core::ThreadState* is valid at the time of compilation. + * + * @param IR - IR that maps to the IR for this RIP + * @param DebugData - Debug data that is available for this IR indirectly + * + * @return An executable function pointer that is theoretically compiled from this point. + * Is actually a function pointer of type `void (FEXCore::Core::ThreadState *Thread) + */ + virtual void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData) = 0; + + /** + * @brief Function for mapping memory in to the CPUBackend's visible space. Allows setting up virtual mappings if required + * + * @return Currently unused + */ + virtual void *MapRegion(void *HostPtr, uint64_t GuestPtr, uint64_t Size) = 0; + + /** + * @brief This is post-setup initialization that is called just before code executino + * + * Guest memory is available at this point and ThreadState is valid + */ + virtual void Initialize() {} + + /** + * @brief Lets FEXCore know if this CPUBackend needs IR and DebugData for CompileCode + * + * This is useful if the FEXCore Frontend hits an x86-64 instruction that isn't understood but can continue regardless + * + * This is useful for example, a VM based CPUbackend + * + * @return true if it needs the IR + */ + virtual bool NeedsOpDispatch() = 0; + }; + +} +} diff --git a/include/FEXCore/Core/CodeLoader.h b/include/FEXCore/Core/CodeLoader.h new file mode 100644 index 000000000..c0d750035 --- /dev/null +++ b/include/FEXCore/Core/CodeLoader.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +namespace FEXCore { +/** + * @brief Code loader class so the CPU backend can load code in a generic fashion + * + * This class is expected to have multiple different style of code loaders +*/ +class CodeLoader { +public: + + /** + * @brief CPU Core uses this to choose what the stack size should be for this code + */ + virtual uint64_t StackSize() const = 0; + /** + * @brief Allows the code loader to set up the stack the way it wants + * + * @param HostPtr The host facing pointer to the base of the stack. + * Size of memory will be at least the size that StackSize() returns + * + * @param GuestPtr The guest facing memory location where the base of the stack lives + * + * @return The location that the guest stack pointer register should be set to + * + * Probably will be GuestPtr + StackSize() - + */ + virtual uint64_t SetupStack(void *HostPtr, uint64_t GuestPtr) const = 0; + + /** + * @brief Function to return the guest RIP that the code should start out at + */ + virtual uint64_t DefaultRIP() const = 0; + + using MemoryLayout = std::tuple; + /** + * @brief Gets the default memory layout of the memory object being loaded + * + * This will be mapped in to the guest memory space automatically + * + * @return A MemoryLayout object describing the layout of the region + */ + virtual MemoryLayout GetLayout() const = 0; + + /** + * @brief Allows the loader to map memory regions that it needs + * + * Code loader is expected to call the Mapper function with a memory offset and size for mapping + * + * @param Mapper Returns the host facing pointer for memory setup if the codfe loader needs to do things to it + */ + virtual void MapMemoryRegion(std::function Mapper) {} + + /** + * @brief Memory writer function for loading code in to guest memory + * + * First argument = Data to write + * Second argument = Guest memory data location + * Third argument = Guest memory size + */ + using MemoryWriter = std::function; + virtual void LoadMemory(MemoryWriter Writer) = 0; + + /** + * @brief Get the final RIP we are supposed to end up on in a debugger + * + * @return When the debugger reaches this RIP then we know that we have completed + */ + virtual uint64_t GetFinalRIP() { return ~0ULL; } + + virtual char const *FindSymbolNameInRange(uint64_t Address) { return nullptr; } + +}; + + +} diff --git a/include/FEXCore/Core/Context.h b/include/FEXCore/Core/Context.h new file mode 100644 index 000000000..47f0c8c48 --- /dev/null +++ b/include/FEXCore/Core/Context.h @@ -0,0 +1,193 @@ +#pragma once +#include +#include + +namespace FEXCore { + class CodeLoader; +} + +namespace FEXCore::Core { + struct CPUState; + struct ThreadState; +} + +namespace FEXCore::CPU { + class CPUBackend; +} + +namespace FEXCore::HLE { + struct SyscallArguments; + class SyscallVisitor; +} +namespace FEXCore::SHM { + struct SHMObject; +} + +namespace FEXCore::Context { + struct Context; + enum ExitReason { + EXIT_NONE, + EXIT_WAITING, + EXIT_ASYNC_RUN, + EXIT_SHUTDOWN, + EXIT_DEBUG, + EXIT_UNKNOWNERROR, + }; + using CustomCPUFactoryType = std::function; + + /** + * @brief This initializes internal FEXCore state that is shared between contexts and requires overhead to setup + */ + void InitializeStaticTables(); + + /** + * @brief [[threadsafe]] Create a new FEXCore context object + * + * This is necessary to do when running threaded contexts + * + * @return a new context object + */ + FEXCore::Context::Context *CreateNewContext(); + + /** + * @brief Post creation context initialization + * Once configurations have been set, do the post-creation initialization with that configuration + * + * @param CTX The context that we created + * + * @return true if we managed to initialize correctly + */ + bool InitializeContext(FEXCore::Context::Context *CTX); + + /** + * @brief Destroy the context object + * + * @param CTX + */ + void DestroyContext(FEXCore::Context::Context *CTX); + + /** + * @brief Adds a base pointer that the VM can use for "physical" memory backing + * + * Will be the guests physical memory location of zero + * + * @return true on added. false when we had already added a guest memory region + */ + bool AddGuestMemoryRegion(FEXCore::Context::Context *CTX, FEXCore::SHM::SHMObject *SHM); + + /** + * @brief Allows setting up in memory code and other things prior to launchign code execution + * + * @param CTX The context that we created + * @param Loader The loader that will be doing all the code loading + * + * @return true if we loaded code + */ + bool InitCore(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader); + + void SetApplicationFile(FEXCore::Context::Context *CTX, std::string const &File); + + /** + * @brief Starts running the CPU core + * + * If WaitForIdle is enabled then this call will block until the thread exits or if single stepping is enabled, after the core steps one instruction + * + * @param CTX The context that we created + * @param WaitForIdle Should we wait for the core to be idle or not + * + * @return The ExitReason for the parentthread. ASYNC_RUN if WaitForIdle was false + */ + ExitReason RunLoop(FEXCore::Context::Context *CTX, bool WaitForIdle); + + /** + * @brief [[threadsafe]] Returns the ExitReason of the parent thread. Typically used for async result status + * + * @param CTX The context that we created + * + * @return The ExitReason for the parentthread + */ + ExitReason GetExitReason(FEXCore::Context::Context *CTX); + + /** + * @brief [[theadsafe]] Checks if the Context is either done working or paused(in the case of single stepping) + * + * Use this when the context is async running to determine if it is done + * + * @param CTX the context that we created + * + * @return true if the core is done or paused + */ + bool IsDone(FEXCore::Context::Context *CTX); + + /** + * @brief Gets a copy the CPUState of the parent thread + * + * @param CTX The context that we created + * @param State The state object to populate + */ + void GetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State); + + /** + * @brief Copies the CPUState provided to the parent thread + * + * @param CTX The context that we created + * @param State The satate object to copy from + */ + void SetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State); + + void Pause(FEXCore::Context::Context *CTX); + + /** + * @brief Allows the frontend to pass in a custom CPUBackend creation factory + * + * This allows the frontend to have its own frontend. Typically for debugging + * + * @param CTX The context that we created + * @param Factory The factory that the context will call if the DefaultCore config ise set to CUSTOM + */ + void SetCustomCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory); + + /** + * @brief Allows a custom CPUBackend creation factory for fallback routines when the main CPUBackend core can't handle an instruction + * + * This is only useful for debugging new instruction decodings that FEXCore doesn't understand + * The CPUBackend that is created from this factory must have its NeedsOpDispatch function to return false + * + * @param CTX The context that we created + * @param Factory The factory that the context will call on core creation + */ + void SetFallbackCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory); + + /** + * @brief This allows a frontend core to call Syscall routines directly. Useful for debugging + * + * @param CTX The context that we created + * @param Thread The thread to run the syscall on + * @param Args The arguments to the syscall + * + * @return The value that a syscall returns + */ + uint64_t HandleSyscall(FEXCore::Context::Context *CTX, FEXCore::Core::ThreadState *Thread, FEXCore::HLE::SyscallArguments *Args); + + /** + * @brief Sets up memory regions on the guest for mirroring within the guest's VM space + * + * @param VirtualAddress The address we want to set to mirror a physical memory region + * @param PhysicalAddress The physical memory region we are mapping + * @param Size Size of the region to mirror + * + * @return true when successfully mapped. false if there was an error adding + */ + bool AddVirtualMemoryMapping(FEXCore::Context::Context *CTX, uint64_t VirtualAddress, uint64_t PhysicalAddress, uint64_t Size); + + /** + * @brief Allows the frontend to set a custom syscall handler + * + * Useful for debugging purposes. May not work if the syscall ID exceeds the maximum number of syscalls in the lookup table + * + * @param Syscall Which syscall ID to install a visitor to + * @param Visitor The Visitor to install + */ + void RegisterExternalSyscallVisitor(FEXCore::Context::Context *CTX, uint64_t Syscall, FEXCore::HLE::SyscallVisitor *Visitor); + +} diff --git a/include/FEXCore/Core/CoreState.h b/include/FEXCore/Core/CoreState.h new file mode 100644 index 000000000..9bc9bbf29 --- /dev/null +++ b/include/FEXCore/Core/CoreState.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace FEXCore::Core { + struct __attribute__((packed)) CPUState { + uint64_t rip; ///< Current core's RIP. May not be entirely accurate while JIT is active + uint64_t gregs[16]; + uint64_t : 64; + uint64_t xmm[16][2]; + uint64_t gs; + uint64_t fs; + uint8_t flags[32]; + }; + static_assert(offsetof(CPUState, xmm) % 16 == 0, "xmm needs to be 128bit aligned!"); + + struct __attribute__((packed)) ThreadState { + CPUState State{}; + + struct { + std::atomic_bool Running {false}; + std::atomic_bool ShouldStop {false}; + std::atomic_bool ShouldPause {false}; + std::atomic_bool WaitingToStart {false}; + } RunningEvents; + + FEXCore::HLE::ThreadManagement ThreadManager; + + uint8_t InternalState[0]; + }; + static_assert(offsetof(ThreadState, State) == 0, "CPUState must be first member in threadstate"); + + constexpr uint64_t PAGE_SIZE = 4096; + + std::string_view const& GetFlagName(unsigned Flag); +} diff --git a/include/FEXCore/Core/X86Enums.h b/include/FEXCore/Core/X86Enums.h new file mode 100644 index 000000000..bacdc7b25 --- /dev/null +++ b/include/FEXCore/Core/X86Enums.h @@ -0,0 +1,63 @@ +#pragma once + +namespace FEXCore::X86State { +/** + * @name The ordered of the GPRs from name to index + * @{ */ +constexpr unsigned REG_RAX = 0; +constexpr unsigned REG_RBX = 1; +constexpr unsigned REG_RCX = 2; +constexpr unsigned REG_RDX = 3; +constexpr unsigned REG_RSI = 4; +constexpr unsigned REG_RDI = 5; +constexpr unsigned REG_RBP = 6; +constexpr unsigned REG_RSP = 7; +constexpr unsigned REG_R8 = 8; +constexpr unsigned REG_R9 = 9; +constexpr unsigned REG_R10 = 10; +constexpr unsigned REG_R11 = 11; +constexpr unsigned REG_R12 = 12; +constexpr unsigned REG_R13 = 13; +constexpr unsigned REG_R14 = 14; +constexpr unsigned REG_R15 = 15; +constexpr unsigned REG_XMM_0 = 16; +constexpr unsigned REG_XMM_1 = 17; +constexpr unsigned REG_XMM_2 = 18; +constexpr unsigned REG_XMM_3 = 19; +constexpr unsigned REG_XMM_4 = 20; +constexpr unsigned REG_XMM_5 = 21; +constexpr unsigned REG_XMM_6 = 22; +constexpr unsigned REG_XMM_7 = 23; +constexpr unsigned REG_XMM_8 = 24; +constexpr unsigned REG_XMM_9 = 25; +constexpr unsigned REG_XMM_10 = 26; +constexpr unsigned REG_XMM_11 = 27; +constexpr unsigned REG_XMM_12 = 28; +constexpr unsigned REG_XMM_13 = 29; +constexpr unsigned REG_XMM_14 = 30; +constexpr unsigned REG_XMM_15 = 31; +constexpr unsigned REG_INVALID = 255; +/** @} */ + +/** + * @name RFLAG register bit locations + * @{ */ +constexpr unsigned RFLAG_CF_LOC = 0; +constexpr unsigned RFLAG_PF_LOC = 2; +constexpr unsigned RFLAG_AF_LOC = 4; +constexpr unsigned RFLAG_ZF_LOC = 6; +constexpr unsigned RFLAG_SF_LOC = 7; +constexpr unsigned RFLAG_TF_LOC = 8; +constexpr unsigned RFLAG_IF_LOC = 9; +constexpr unsigned RFLAG_DF_LOC = 10; +constexpr unsigned RFLAG_OF_LOC = 11; +constexpr unsigned RFLAG_IOPL_LOC = 12; +constexpr unsigned RFLAG_NT_LOC = 14; +constexpr unsigned RFLAG_RF_LOC = 16; +constexpr unsigned RFLAG_VM_LOC = 17; +constexpr unsigned RFLAG_AC_LOC = 18; +constexpr unsigned RFLAG_VIF_LOC = 19; +constexpr unsigned RFLAG_VIP_LOC = 20; +constexpr unsigned RFLAG_ID_LOC = 21; + +} diff --git a/include/FEXCore/Debug/ContextDebug.h b/include/FEXCore/Debug/ContextDebug.h new file mode 100644 index 000000000..59643b5c6 --- /dev/null +++ b/include/FEXCore/Debug/ContextDebug.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include +#include + +#include +#include + +namespace FEXCore::Core { + struct RuntimeStats; +} + +namespace FEXCore::Context { + struct Context; + +namespace Debug { + + void CompileRIP(FEXCore::Context::Context *CTX, uint64_t RIP); + + uint64_t GetThreadCount(FEXCore::Context::Context *CTX); + FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(FEXCore::Context::Context *CTX, uint64_t Thread); + FEXCore::Core::CPUState GetCPUState(FEXCore::Context::Context *CTX); + + void GetMemoryRegions(FEXCore::Context::Context *CTX, std::vector *Regions); + + bool GetDebugDataForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::Core::DebugData *Data); + bool FindHostCodeForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, uint8_t **Code); + // XXX: + // bool FindIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir); + // void SetIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir); + FEXCore::Core::ThreadState *GetThreadState(FEXCore::Context::Context *CTX); +} +} + + diff --git a/include/FEXCore/Debug/InternalThreadState.h b/include/FEXCore/Debug/InternalThreadState.h new file mode 100644 index 000000000..66992461d --- /dev/null +++ b/include/FEXCore/Debug/InternalThreadState.h @@ -0,0 +1,65 @@ +#pragma once +#include "Event.h" +#include +#include +#include +#include +#include +#include + +namespace FEXCore { + class BlockCache; +} + +namespace FEXCore::Context { + struct Context; +} +namespace FEXCore::IR{ + class OpDispatchBuilder; +} + +namespace FEXCore::Core { + + struct RuntimeStats { + std::atomic_uint64_t InstructionsExecuted; + std::atomic_uint64_t BlocksCompiled; + }; + + /** + * @brief Contains debug data for a block of code for later debugger analysis + * + * Needs to remain around for as long as the code could be executed at least + */ + struct DebugData { + uint64_t HostCodeSize; ///< The size of the code generated in the host JIT + uint64_t GuestCodeSize; ///< The size of the guest side code + uint64_t GuestInstructionCount; ///< Number of guest instructions + uint64_t TimeSpentInCode; ///< How long this code has spent time running + uint64_t RunCount; ///< Number of times this block of code has been run + }; + + struct __attribute__((packed)) InternalThreadState { + FEXCore::Core::ThreadState State; + + FEXCore::Context::Context *CTX; + + std::thread ExecutionThread; + Event StartRunning; + Event ThreadWaiting; + + std::unique_ptr OpDispatcher; + + std::unique_ptr CPUBackend; + std::unique_ptr FallbackBackend; + + std::unique_ptr BlockCache; + + std::map>> IRLists; + std::map DebugData; + RuntimeStats Stats{}; + + FEXCore::Context::ExitReason ExitReason {FEXCore::Context::ExitReason::EXIT_WAITING}; + }; +} + + diff --git a/include/FEXCore/Debug/X86Tables.h b/include/FEXCore/Debug/X86Tables.h new file mode 100644 index 000000000..51b589994 --- /dev/null +++ b/include/FEXCore/Debug/X86Tables.h @@ -0,0 +1,399 @@ +#pragma once + +#include +#include +#include + +namespace FEXCore::IR { +///< Forward declaration of OpDispatchBuilder +class OpDispatchBuilder; +} + +namespace FEXCore::X86Tables { + +///< Forward declaration of X86InstInfo +struct X86InstInfo; + +namespace DecodeFlags { +constexpr uint32_t FLAG_OPERAND_SIZE = (1 << 0); +constexpr uint32_t FLAG_ADDRESS_SIZE = (1 << 1); +constexpr uint32_t FLAG_LOCK = (1 << 2); +constexpr uint32_t FLAG_LEGACY_PREFIX = (1 << 3); +constexpr uint32_t FLAG_REX_PREFIX = (1 << 4); +constexpr uint32_t FLAG_MODRM_PRESENT = (1 << 5); +constexpr uint32_t FLAG_SIB_PRESENT = (1 << 6); +constexpr uint32_t FLAG_REX_WIDENING = (1 << 7); +constexpr uint32_t FLAG_REX_XGPR_B = (1 << 8); +constexpr uint32_t FLAG_REX_XGPR_X = (1 << 9); +constexpr uint32_t FLAG_REX_XGPR_R = (1 << 10); +constexpr uint32_t FLAG_FS_PREFIX = (1 << 11); +constexpr uint32_t FLAG_GS_PREFIX = (1 << 12); +constexpr uint32_t FLAG_REP_PREFIX = (1 << 13); +constexpr uint32_t FLAG_REPNE_PREFIX = (1 << 14); +// Size flags +constexpr uint32_t FLAG_SIZE_DST_OFF = 15; +constexpr uint32_t FLAG_SIZE_SRC_OFF = FLAG_SIZE_DST_OFF + 3; +constexpr uint32_t SIZE_MASK = 0b111; +constexpr uint32_t SIZE_DEF = 0b000; // This should be invalid past decoding +constexpr uint32_t SIZE_8BIT = 0b001; +constexpr uint32_t SIZE_16BIT = 0b010; +constexpr uint32_t SIZE_32BIT = 0b011; +constexpr uint32_t SIZE_64BIT = 0b100; +constexpr uint32_t SIZE_128BIT = 0b101; +constexpr uint32_t SIZE_256BIT = 0b110; + +inline uint32_t GetSizeDstFlags(uint32_t Flags) { return (Flags >> FLAG_SIZE_DST_OFF) & SIZE_MASK; } +inline uint32_t GetSizeSrcFlags(uint32_t Flags) { return (Flags >> FLAG_SIZE_SRC_OFF) & SIZE_MASK; } + +inline uint32_t GenSizeDstSize(uint32_t Size) { return Size << FLAG_SIZE_DST_OFF; } +inline uint32_t GenSizeSrcSize(uint32_t Size) { return Size << FLAG_SIZE_SRC_OFF; } +} + +union DecodedOperand { + enum { + TYPE_NONE, + TYPE_GPR, + TYPE_GPR_DIRECT, + TYPE_GPR_INDIRECT, + TYPE_RIP_RELATIVE, + TYPE_LITERAL, + TYPE_SIB, + }; + + struct { + uint8_t Type; + } TypeNone; + + struct { + uint8_t Type; + bool HighBits; + uint8_t GPR; + } TypeGPR; + + struct { + uint8_t Type; + uint8_t GPR; + int32_t Displacement; + } TypeGPRIndirect; + + struct { + uint8_t Type; + int32_t Literal; + } TypeRIPLiteral; + + struct { + uint8_t Type; + uint8_t Size; + uint64_t Literal; + } TypeLiteral; + + struct { + uint8_t Type; + uint8_t Index; // ~0 invalid + uint8_t Base; // ~0 invalid + uint32_t Scale : 8; + int32_t Offset; + } TypeSIB; +}; + +struct DecodedInst { + uint64_t PC; + + uint16_t OP; + uint32_t Flags; + + uint8_t ModRM; + uint8_t SIB; + uint8_t InstSize; + uint8_t LastEscapePrefix; + bool DecodedModRM; + bool DecodedSIB; + + DecodedOperand Dest; + DecodedOperand Src1; + DecodedOperand Src2; + + // Constains the dispatcher handler pointer + X86InstInfo const* TableInfo; +}; + +union ModRMDecoded { + uint8_t Hex{}; + struct { + uint8_t rm : 3; + uint8_t reg : 3; + uint8_t mod : 2; + }; +}; + +union SIBDecoded { + uint8_t Hex{}; + struct { + uint8_t base : 3; + uint8_t index : 3; + uint8_t scale : 2; + }; +}; + +enum InstType { + TYPE_UNKNOWN, + TYPE_LEGACY_PREFIX, + TYPE_PREFIX, + TYPE_REX_PREFIX, + TYPE_SECONDARY_TABLE_PREFIX, + TYPE_X87_TABLE_PREFIX, + TYPE_MODRM_TABLE_PREFIX, + TYPE_VEX_TABLE_PREFIX, + TYPE_XOP_TABLE_PREFIX, + TYPE_INST, + TYPE_INVALID, + TYPE_COPY_OTHER, + + // Must be in order + // Groups 1, 1a, 2, 3, 4, 5, 11 are for the primary op table + // Groups 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, p are for the secondary op table + TYPE_GROUP_1, + TYPE_GROUP_1A, + TYPE_GROUP_2, + TYPE_GROUP_3, + TYPE_GROUP_4, + TYPE_GROUP_5, + TYPE_GROUP_11, + + // Must be in order + // Groups 6-p Are for the secondary op table + TYPE_GROUP_6, + TYPE_GROUP_7, + TYPE_GROUP_8, + TYPE_GROUP_9, + TYPE_GROUP_10, + TYPE_GROUP_12, + TYPE_GROUP_13, + TYPE_GROUP_14, + TYPE_GROUP_15, + TYPE_GROUP_16, + TYPE_GROUP_17, + TYPE_GROUP_P, + + // The secondary op extension table allows further extensions + // Group 7 allows additional extensions to this table + TYPE_SECOND_GROUP_MODRM, + + // Just to make grepping easier + TYPE_3DNOW_TABLE = TYPE_INVALID, + TYPE_3DNOW_INST = TYPE_INVALID, + + // Exists in the table but isn't decoded correctly + TYPE_UNDEC = TYPE_INVALID, + TYPE_MMX = TYPE_INVALID, + TYPE_X87 = TYPE_INVALID, + TYPE_PRIV = TYPE_INVALID, + TYPE_0F38_TABLE = TYPE_INVALID, + TYPE_0F3A_TABLE = TYPE_INVALID, +}; + +namespace InstFlags { +constexpr uint32_t FLAGS_NONE = 0; +constexpr uint32_t FLAGS_DEBUG = (1 << 1); +constexpr uint32_t FLAGS_DEBUG_MEM_ACCESS = (1 << 2); +constexpr uint32_t FLAGS_SUPPORTS_REP = (1 << 3); +constexpr uint32_t FLAGS_BLOCK_END = (1 << 4); +constexpr uint32_t FLAGS_SETS_RIP = (1 << 5); + +constexpr uint32_t FLAGS_DISPLACE_SIZE_MUL_2 = (1 << 6); +constexpr uint32_t FLAGS_DISPLACE_SIZE_DIV_2 = (1 << 7); +constexpr uint32_t FLAGS_SRC_SEXT = (1 << 8); +constexpr uint32_t FLAGS_MEM_OFFSET = (1 << 9); + +// Enables XMM based subflags +// Current reserved range for this SF is [10, 15] +constexpr uint32_t FLAGS_XMM_FLAGS = (1 << 10); + + // Non-XMM subflags + constexpr uint32_t FLAGS_SF_DST_RAX = (1 << 11); + constexpr uint32_t FLAGS_SF_DST_RDX = (1 << 12); + constexpr uint32_t FLAGS_SF_SRC_RAX = (1 << 13); + constexpr uint32_t FLAGS_SF_SRC_RCX = (1 << 14); + constexpr uint32_t FLAGS_SF_REX_IN_BYTE = (1 << 15); + + // XMM subflags + constexpr uint32_t FLAGS_SF_HIGH_XMM_REG = (1 << 11); + constexpr uint32_t FLAGS_SF_DST_GPR = (1 << 12); + constexpr uint32_t FLAGS_SF_SRC_GPR = (1 << 13); + +// Enables MODRM specific subflags +// Current reserved range for this SF is [14, 17] +constexpr uint32_t FLAGS_MODRM = (1 << 16); + + // With ModRM SF flag enabled + // Direction of ModRM. Dst ^ Src + // Set means destination is rm bits + // Unset means src is rm bits + constexpr uint32_t FLAGS_SF_MOD_DST = (1 << 17); + + // If the instruction is restricted to mem or reg only + // 0b00 = Regular ModRM support + // 0b01 = Memory accesses only + // 0b10 = Register accesses only + // 0b11 = + constexpr uint32_t FLAGS_SF_MOD_MEM_ONLY = (1 << 18); + constexpr uint32_t FLAGS_SF_MOD_REG_ONLY = (1 << 19); + +constexpr uint32_t FLAGS_SIZE_DST_OFF = 20; +constexpr uint32_t FLAGS_SIZE_SRC_OFF = FLAGS_SIZE_DST_OFF + 3; + +constexpr uint32_t SIZE_MASK = 0b111; +constexpr uint32_t SIZE_DEF = 0b000; +constexpr uint32_t SIZE_8BIT = 0b001; +constexpr uint32_t SIZE_16BIT = 0b010; +constexpr uint32_t SIZE_32BIT = 0b011; +constexpr uint32_t SIZE_64BIT = 0b100; +constexpr uint32_t SIZE_128BIT = 0b101; +constexpr uint32_t SIZE_256BIT = 0b110; + +inline uint32_t GetSizeDstFlags(uint32_t Flags) { return (Flags >> FLAGS_SIZE_DST_OFF) & SIZE_MASK; } +inline uint32_t GetSizeSrcFlags(uint32_t Flags) { return (Flags >> FLAGS_SIZE_SRC_OFF) & SIZE_MASK; } + +inline uint32_t GenFlagsDstSize(uint32_t Size) { return Size << FLAGS_SIZE_DST_OFF; } +inline uint32_t GenFlagsSrcSize(uint32_t Size) { return Size << FLAGS_SIZE_SRC_OFF; } +inline uint32_t GenFlagsSameSize(uint32_t Size) {return (Size << FLAGS_SIZE_DST_OFF) | (Size << FLAGS_SIZE_SRC_OFF); } +inline uint32_t GenFlagsSizes(uint32_t Dest, uint32_t Src) {return (Dest << FLAGS_SIZE_DST_OFF) | (Src << FLAGS_SIZE_SRC_OFF); } + + +// If it has an xmm subflag +#define HAS_XMM_SUBFLAG(x, flag) (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) + +// If it has non-xmm subflag +#define HAS_NON_XMM_SUBFLAG(x, flag) (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (flag)) +} + +auto OpToIndex = [](uint8_t Op) constexpr -> uint8_t { + switch (Op) { + // Group 1 + case 0x80: return 0; + case 0x81: return 1; + case 0x82: return 2; + case 0x83: return 3; + // Group 2 + case 0xC0: return 0; + case 0xC1: return 1; + case 0xD0: return 2; + case 0xD1: return 3; + case 0xD2: return 4; + case 0xD3: return 5; + // Group 3 + case 0xF6: return 0; + case 0xF7: return 1; + // Group 4 + case 0xFE: return 0; + // Group 5 + case 0xFF: return 0; + // Group 11 + case 0xC6: return 0; + case 0xC7: return 1; + } + return 0; +}; + +using DecodedOp = DecodedInst const*; +using OpDispatchPtr = void (IR::OpDispatchBuilder::*)(DecodedOp); + +#ifndef NDEBUG +namespace X86InstDebugInfo { +constexpr uint64_t FLAGS_MEM_ALIGN_4 = (1 << 0); +constexpr uint64_t FLAGS_MEM_ALIGN_8 = (1 << 1); +constexpr uint64_t FLAGS_MEM_ALIGN_16 = (1 << 2); +constexpr uint64_t FLAGS_MEM_ALIGN_SIZE = (1 << 3); // If instruction size changes depending on prefixes +constexpr uint64_t FLAGS_MEM_ACCESS = (1 << 4); +constexpr uint64_t FLAGS_DEBUG = (1 << 5); +constexpr uint64_t FLAGS_DIVIDE = (1 << 6); + + +struct Flags { + uint64_t DebugFlags; +}; +void InstallDebugInfo(); +} + +#endif + +struct X86InstInfo { + char const *Name; + InstType Type; + uint32_t Flags; ///< Must be larger than InstFlags enum + uint8_t MoreBytes; + OpDispatchPtr OpcodeDispatcher; +#ifndef NDEBUG + X86InstDebugInfo::Flags DebugInfo; + uint32_t NumUnitTestsGenerated; +#endif + + bool operator==(const X86InstInfo &b) const { + if (strcmp(Name, b.Name) != 0 || + Type != b.Type || + Flags != b.Flags || + MoreBytes != b.MoreBytes) + return false; + + // We don't care if the opcode dispatcher differs + return true; + } +}; + +static_assert(std::is_pod::value, "Pod?"); + +constexpr size_t MAX_PRIMARY_TABLE_SIZE = 256; +constexpr size_t MAX_SECOND_TABLE_SIZE = 256; +constexpr size_t MAX_REP_MOD_TABLE_SIZE = 256; +constexpr size_t MAX_REPNE_MOD_TABLE_SIZE = 256; +constexpr size_t MAX_OPSIZE_MOD_TABLE_SIZE = 256; +// 6 (groups) | 6 (max indexes) | 8 ops = 0b111'111'111 = 9 bits +constexpr size_t MAX_INST_GROUP_TABLE_SIZE = 512; +// 12 (groups) | 3(max indexes) | 8 ops = 0b1111'11'111 = 9 bits +constexpr size_t MAX_INST_SECOND_GROUP_TABLE_SIZE = 512; +constexpr size_t MAX_X87_TABLE_SIZE = 1 << 11; +constexpr size_t MAX_SECOND_MODRM_TABLE_SIZE = 32; +// 3 prefixes | 8 bit opcode +constexpr size_t MAX_0F_38_TABLE_SIZE = (1 << 11); +// 1 REX | 1 prefixes | 8 bit opcode +constexpr size_t MAX_0F_3A_TABLE_SIZE = (1 << 11); +constexpr size_t MAX_3DNOW_TABLE_SIZE = 256; +// VEX +// map_select(2 bits for now) | vex.pp (2 bits) | opcode (8bit) +constexpr size_t MAX_VEX_TABLE_SIZE = (1 << 13); +// VEX group ops +// group select (3 bits for now) | ModRM opcode (3 bits) +constexpr size_t MAX_VEX_GROUP_TABLE_SIZE = (1 << 7); + +// XOP +// group (2 bits for now) | vex.pp (2 bits) | opcode (8bit) +constexpr size_t MAX_XOP_TABLE_SIZE = (1 << 13); + +// XOP group ops +// group select (2 bits for now) | modrm opcode (3 bits) +constexpr size_t MAX_XOP_GROUP_TABLE_SIZE = (1 << 6); + +extern std::array BaseOps; +extern std::array SecondBaseOps; +extern std::array RepModOps; +extern std::array RepNEModOps; +extern std::array OpSizeModOps; +extern std::array PrimaryInstGroupOps; +extern std::array SecondInstGroupOps; +extern std::array SecondModRMTableOps; +extern std::array X87Ops; +extern std::array DDDNowOps; + +extern std::array H0F38TableOps; +extern std::array H0F3ATableOps; + +// VEX +extern std::array VEXTableOps; +extern std::array VEXTableGroupOps; + +// XOP +extern std::array XOPTableOps; +extern std::array XOPTableGroupOps; + +void InitializeInfoTables(); +} diff --git a/include/FEXCore/HLE/Linux/ThreadManagement.h b/include/FEXCore/HLE/Linux/ThreadManagement.h new file mode 100644 index 000000000..ec8464c9a --- /dev/null +++ b/include/FEXCore/HLE/Linux/ThreadManagement.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace FEXCore::HLE { +// XXX: This should map multiple IDs correctly +// Tracking relationships between thread IDs and such +class ThreadManagement { +public: + uint64_t GetUID() { return UID; } + uint64_t GetGID() { return GID; } + uint64_t GetEUID() { return EUID; } + uint64_t GetEGID() { return EGID; } + uint64_t GetTID() { return TID; } + uint64_t GetPID() { return PID; } + + uint64_t UID{1000}; + uint64_t GID{1000}; + uint64_t EUID{1000}; + uint64_t EGID{1000}; + uint64_t TID{1}; + uint64_t PID{1}; + uint64_t child_tid{0}; + uint64_t parent_tid{0}; + uint64_t robust_list_head{0}; +}; +} diff --git a/include/FEXCore/HLE/SyscallHandler.h b/include/FEXCore/HLE/SyscallHandler.h new file mode 100644 index 000000000..9f2772ae1 --- /dev/null +++ b/include/FEXCore/HLE/SyscallHandler.h @@ -0,0 +1,9 @@ +#pragma once +#include + +namespace FEXCore::HLE { + struct SyscallArguments { + static constexpr std::size_t MAX_ARGS = 7; + uint64_t Argument[MAX_ARGS]; + }; +} diff --git a/include/FEXCore/HLE/SyscallVisitor.h b/include/FEXCore/HLE/SyscallVisitor.h new file mode 100644 index 000000000..c01106f2e --- /dev/null +++ b/include/FEXCore/HLE/SyscallVisitor.h @@ -0,0 +1,49 @@ +#pragma once +#include "LogManager.h" +#include + +namespace FEXCore::HLE { +#define INVALID_OP { LogMan::Msg::A("Tried to syscall with unknown number of registers"); return 0; } + class SyscallVisitor { + public: + SyscallVisitor(uint32_t Mask) : SyscallVisitor(Mask, false) {} + SyscallVisitor(uint32_t Mask, bool Constant) : ArgsMask { Mask }, ConstantVal { Constant } {} + + /** + * @brief If this syscall returns a constant value regardless of state then we can just read the value at compile time + * Won't happen often + * + * @return true if it is constant value + */ + bool IsConstant() { return ConstantVal; } + + virtual uint64_t VisitSyscall0() INVALID_OP + virtual uint64_t VisitSyscall1(uint64_t RDI) INVALID_OP + virtual uint64_t VisitSyscall2(uint64_t RDI, + uint64_t RSI) INVALID_OP + virtual uint64_t VisitSyscall3(uint64_t RDI, + uint64_t RSI, + uint64_t RDX) INVALID_OP + virtual uint64_t VisitSyscall4(uint64_t RDI, + uint64_t RSI, + uint64_t RDX, + uint64_t R10) INVALID_OP + virtual uint64_t VisitSyscall5(uint64_t RDI, + uint64_t RSI, + uint64_t RDX, + uint64_t R10, + uint64_t R8) INVALID_OP + // This one MUST be valid + // Hard fallback if we couldn't look it up + virtual uint64_t VisitSyscall6(uint64_t RDI, + uint64_t RSI, + uint64_t RDX, + uint64_t R10, + uint64_t R8, + uint64_t R9) = 0; + private: + uint32_t ArgsMask{}; + bool ConstantVal{}; + }; +#undef INVALID_OP +} diff --git a/include/FEXCore/IR/IR.h b/include/FEXCore/IR/IR.h new file mode 100644 index 000000000..a699ae636 --- /dev/null +++ b/include/FEXCore/IR/IR.h @@ -0,0 +1,329 @@ +#pragma once +#include +#include +#include +#include + +namespace FEXCore::IR { + +/** + * @brief This is a very simple wrapper for our node pointers + * + * This is necessary to allow two things + * - Reduce memory usage by having the pointer be an 32bit offset rather than the whole 64bit pointer + * - Actually use an offset from a base so we aren't storing pointers for everything + * - Makes IR list copying be as cheap as a memcpy + * Downsides + * - The IR nodes have to be allocated out of a linear array of memory + * - We currently only allow a 32bit offset, so *only* 4 million nodes per list + * - We have to have the base offset live somewhere else + * - Has to be POD and trivially copyable + * - Makes every real node access turn in to a [Base + Offset] access + */ +struct NodeWrapper final { + // On x86-64 using a uint64_t type is more efficient since RIP addressing gives you [ + + ] + // On AArch64 using uint32_t is just more memory efficient. 32bit or 64bit offset doesn't matter + // We use uint32_t to be more memory efficient (Cuts our node list size in half) + using NodeOffsetType = uint32_t; + NodeOffsetType NodeOffset; + + static NodeWrapper WrapOffset(NodeOffsetType Offset) { + NodeWrapper Wrapped; + Wrapped.NodeOffset = Offset; + return Wrapped; + } + + static NodeWrapper WrapPtr(uintptr_t Base, uintptr_t Value) { + NodeWrapper Wrapped; + Wrapped.SetOffset(Base, Value); + return Wrapped; + } + + static void *UnwrapNode(uintptr_t Base, NodeWrapper Node) { + return Node.GetPtr(Base); + } + + uint32_t ID() const; + + explicit NodeWrapper() = default; + void *GetPtr(uintptr_t Base) { return reinterpret_cast(Base + NodeOffset); } + void const *GetPtr(uintptr_t Base) const { return reinterpret_cast(Base + NodeOffset); } + void SetOffset(uintptr_t Base, uintptr_t Value) { NodeOffset = Value - Base; } + bool operator==(NodeWrapper const &rhs) { return NodeOffset == rhs.NodeOffset; } +}; + +static_assert(std::is_pod::value); +static_assert(sizeof(NodeWrapper) == sizeof(uint32_t)); + +struct OrderedNodeHeader { + NodeWrapper Value; + NodeWrapper Next; + NodeWrapper Previous; +}; + +static_assert(sizeof(OrderedNodeHeader) == sizeof(uint32_t) * 3); + +/** +* @brief This is our NodeWrapperIterator +* This stores both the memory base and the provided NodeWrapper to be able to walk the list of nodes directly +* Only the increment and decrement implementations of this class require understanding the implementation details of OrderedNode +*/ +class NodeWrapperIterator final { +public: + using value_type = NodeWrapper; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = NodeWrapperIterator; + using const_iterator = const NodeWrapperIterator; + using reverse_iterator = iterator; + using const_reverse_iterator = const_iterator; + using iterator_category = std::bidirectional_iterator_tag; + + using NodeType = NodeWrapper; + using NodePtr = NodeWrapper*; + using NodeRef = NodeWrapper&; + + NodeWrapperIterator(uintptr_t Base) : BaseList {Base} {} + explicit NodeWrapperIterator(uintptr_t Base, NodeType Ptr) : BaseList {Base}, Node {Ptr} {} + + bool operator==(const NodeWrapperIterator &rhs) const { + return Node.NodeOffset == rhs.Node.NodeOffset; + } + + bool operator!=(const NodeWrapperIterator &rhs) const { + return !operator==(rhs); + } + + NodeWrapperIterator operator++() { + OrderedNodeHeader *RealNode = reinterpret_cast(Node.GetPtr(BaseList)); + Node = RealNode->Next; + return *this; + } + + NodeWrapperIterator operator--() { + OrderedNodeHeader *RealNode = reinterpret_cast(Node.GetPtr(BaseList)); + Node = RealNode->Previous; + return *this; + } + + NodeRef operator*() { + return Node; + } + + NodePtr operator()() { + return &Node; + } + +private: + uintptr_t BaseList{}; + NodeType Node{}; +}; + +/** + * @brief The IROp_Header is an dynamically sized array + * At the end it contains a uint8_t for the number of arguments that Op has + * Then there is an unsized array of NodeWrapper arguments for the number of arguments this op has + * The op structures that are including the header must ensure that they pad themselves correctly to the number of arguments used + */ +struct IROp_Header; + +/** + * @brief This is a node in our IR representation + * Is a doubly linked list node that lives in a representation of a linearly allocated node list + * The links in the nodes can live in a list independent of the data IR data + * + * ex. + * Region1 : ... <-> <-> <-> ... + * | * | + * v v + * Region2 : ...... + * + * In this example the OrderedNodes are allocated in one linear memory region (Not necessarily contiguous with one another linking) + * The second region is contiguous but they don't have any relationship with one another directly + */ +class OrderedNode final { + friend class NodeWrapperIterator; + friend class OrderedList; + public: + // These three values are laid out very specifically to make it fast to access the NodeWrappers specifically + OrderedNodeHeader Header; + uint32_t NumUses; + + OrderedNode() = default; + + /** + * @brief Appends a node to this current node + * + * Before. <-> <-> + * After. <-> <-> <-> Next + * + * @return Pointer to the node being added + */ + NodeWrapper append(uintptr_t Base, NodeWrapper Node) { + // Set Next Node's Previous to incoming node + SetPrevious(Base, Header.Next, Node); + + // Set Incoming node's links to this node's links + SetPrevious(Base, Node, Wrapped(Base)); + SetNext(Base, Node, Header.Next); + + // Set this node's next to the incoming node + SetNext(Base, Wrapped(Base), Node); + + // Return the node we are appending + return Node; + } + + OrderedNode *append(uintptr_t Base, OrderedNode *Node) { + NodeWrapper WNode = Node->Wrapped(Base); + // Set Next Node's Previous to incoming node + SetPrevious(Base, Header.Next, WNode); + + // Set Incoming node's links to this node's links + SetPrevious(Base, WNode, Wrapped(Base)); + SetNext(Base, WNode, Header.Next); + + // Set this node's next to the incoming node + SetNext(Base, Wrapped(Base), WNode); + + // Return the node we are appending + return Node; + } + + /** + * @brief Prepends a node to the current node + * Before. <-> <-> + * After. <-> <-> <-> Next + * + * @return Pointer to the node being added + */ + NodeWrapper prepend(uintptr_t Base, NodeWrapper Node) { + // Set the previous node's next to the incoming node + SetNext(Base, Header.Previous, Node); + + // Set the incoming node's links + SetPrevious(Base, Node, Header.Previous); + SetNext(Base, Node, Wrapped(Base)); + + // Set the current node's link + SetPrevious(Base, Wrapped(Base), Node); + + // Return the node we are prepending + return Node; + } + + OrderedNode *prepend(uintptr_t Base, OrderedNode *Node) { + NodeWrapper WNode = Node->Wrapped(Base); + // Set the previous node's next to the incoming node + SetNext(Base, Header.Previous, WNode); + + // Set the incoming node's links + SetPrevious(Base, WNode, Header.Previous); + SetNext(Base, WNode, Wrapped(Base)); + + // Set the current node's link + SetPrevious(Base, Wrapped(Base), WNode); + + // Return the node we are prepending + return Node; + } + + /** + * @brief Gets the remaining size of the blocks from this point onward + * + * Doesn't find the head of the list + * + */ + size_t size(uintptr_t Base) const { + size_t Size = 1; + // Walk the list forward until we hit a sentinal + NodeWrapper Current = Header.Next; + while (Current.NodeOffset != 0) { + ++Size; + OrderedNode *RealNode = reinterpret_cast(Current.GetPtr(Base)); + Current = RealNode->Header.Next; + } + return Size; + } + + void Unlink(uintptr_t Base) { + // This removes the node from the list. Orphaning it + // Before: <-> <-> + // After: + SetNext(Base, Header.Previous, Header.Next); + SetPrevious(Base, Header.Next, Header.Previous); + } + + IROp_Header const* Op(uintptr_t Base) const { return reinterpret_cast(Header.Value.GetPtr(Base)); } + IROp_Header *Op(uintptr_t Base) { return reinterpret_cast(Header.Value.GetPtr(Base)); } + + uint32_t GetUses() const { return NumUses; } + + void AddUse() { ++NumUses; } + void RemoveUse() { --NumUses; } + + using iterator = NodeWrapperIterator; + + iterator begin(uint64_t Base) noexcept { return iterator(Base, Wrapped(Base)); } + iterator end(uint64_t Base, uint64_t End) noexcept { return iterator(Base, WrappedOffset(End)); } + + NodeWrapper Wrapped(uintptr_t Base) { + NodeWrapper Tmp; + Tmp.SetOffset(Base, reinterpret_cast(this)); + return Tmp; + } + + private: + NodeWrapper WrappedOffset(uint32_t Offset) { + NodeWrapper Tmp; + Tmp.NodeOffset = Offset; + return Tmp; + } + + static void SetPrevious(uintptr_t Base, NodeWrapper Node, NodeWrapper New) { + if (Node.NodeOffset == 0) return; + OrderedNode *RealNode = reinterpret_cast(Node.GetPtr(Base)); + RealNode->Header.Previous = New; + } + + static void SetNext(uintptr_t Base, NodeWrapper Node, NodeWrapper New) { + if (Node.NodeOffset == 0) return; + OrderedNode *RealNode = reinterpret_cast(Node.GetPtr(Base)); + RealNode->Header.Next = New; + } + + void SetUses(uint32_t Uses) { NumUses = Uses; } +}; + +static_assert(std::is_pod::value); +static_assert(std::is_trivially_copyable::value); +static_assert(offsetof(OrderedNode, Header) == 0); +static_assert(sizeof(OrderedNode) == (sizeof(OrderedNodeHeader) + sizeof(uint32_t))); + +#define IROP_ENUM +#define IROP_STRUCTS +#define IROP_SIZES +#include "IRDefines.inc" + +template +struct Wrapper final { + T *first; + OrderedNode *Node; ///< Actual offset of this IR in ths list + + operator Wrapper() const { return Wrapper {reinterpret_cast(first), Node}; } + operator OrderedNode *() { return Node; } + operator NodeWrapper () { return Node->Header.Value; } +}; + +template +class IRListView; + +void Dump(std::stringstream *out, IRListView const* IR); + +inline uint32_t NodeWrapper::ID() const { return NodeOffset / sizeof(IR::OrderedNode); } + +}; diff --git a/include/FEXCore/IR/IntrusiveIRList.h b/include/FEXCore/IR/IntrusiveIRList.h new file mode 100644 index 000000000..f3fcd78e9 --- /dev/null +++ b/include/FEXCore/IR/IntrusiveIRList.h @@ -0,0 +1,127 @@ +#pragma once + +#include "FEXCore/IR/IR.h" +#include "LogManager.h" + +#include +#include +#include +#include + +namespace FEXCore::IR { +/** + * @brief This is purely an intrusive allocator + * This doesn't support any form of ordering at all + * Just provides a chunk of memory for allocating IR nodes from + * + * Can potentially support reallocation if we are smart and make sure to invalidate anything holding a true pointer + */ +class IntrusiveAllocator final { + public: + IntrusiveAllocator() = delete; + IntrusiveAllocator(IntrusiveAllocator &&) = delete; + IntrusiveAllocator(size_t Size) + : MemorySize {Size} { + Data = reinterpret_cast(calloc(Size, 1)); + } + + ~IntrusiveAllocator() { + free(reinterpret_cast(Data)); + } + + bool CheckSize(size_t Size) { + size_t NewOffset = CurrentOffset + Size; + return NewOffset <= MemorySize; + } + + void *Allocate(size_t Size) { + assert(CheckSize(Size) && "Failure"); + size_t NewOffset = CurrentOffset + Size; + uintptr_t NewPointer = Data + CurrentOffset; + CurrentOffset = NewOffset; + return reinterpret_cast(NewPointer); + } + + size_t Size() const { return CurrentOffset; } + size_t BackingSize() const { return MemorySize; } + + uintptr_t const Begin() const { return Data; } + + void Reset() { CurrentOffset = 0; } + + void CopyData(IntrusiveAllocator const &rhs) { + CurrentOffset = rhs.CurrentOffset; + memcpy(reinterpret_cast(Data), reinterpret_cast(rhs.Data), CurrentOffset); + } + + private: + size_t CurrentOffset {0}; + size_t MemorySize; + uintptr_t Data; +}; + +template +class IRListView final { +public: + IRListView() = delete; + IRListView(IRListView &&) = delete; + + IRListView(IntrusiveAllocator *Data, IntrusiveAllocator *List) { + DataSize = Data->Size(); + ListSize = List->Size(); + + if (Copy) { + IRData = malloc(DataSize + ListSize); + ListData = reinterpret_cast(reinterpret_cast(IRData) + DataSize); + memcpy(IRData, reinterpret_cast(Data->Begin()), DataSize); + memcpy(ListData, reinterpret_cast(List->Begin()), ListSize); + } + else { + // We are just pointing to the data + IRData = reinterpret_cast(Data->Begin()); + ListData = reinterpret_cast(List->Begin()); + } + } + + ~IRListView() { + if (Copy) { + free (IRData); + // ListData is just offset from IRData + } + } + + uintptr_t const GetData() const { return reinterpret_cast(IRData); } + uintptr_t const GetListData() const { return reinterpret_cast(ListData); } + + size_t GetDataSize() const { return DataSize; } + size_t GetListSize() const { return ListSize; } + + using iterator = NodeWrapperIterator; + + iterator begin() const noexcept + { + NodeWrapper Wrapped; + Wrapped.NodeOffset = sizeof(OrderedNode); + return iterator(reinterpret_cast(ListData), Wrapped); + } + + /** + * @brief This is not an iterator that you can reverse iterator through! + * + * @return Our iterator sentinal to ensure ending correctly + */ + iterator end() const noexcept + { + NodeWrapper Wrapped; + Wrapped.NodeOffset = 0; + return iterator(reinterpret_cast(ListData), Wrapped); + } + +private: + void *IRData; + void *ListData; + size_t DataSize; + size_t ListSize; +}; +} + diff --git a/include/FEXCore/Memory/MemMapper.h b/include/FEXCore/Memory/MemMapper.h new file mode 100644 index 000000000..bc4558508 --- /dev/null +++ b/include/FEXCore/Memory/MemMapper.h @@ -0,0 +1,15 @@ +#pragma once +#include +#include + +namespace FEXCore::Memory { + struct MemRegion { + void *Ptr; + size_t Offset; + size_t Size; + + bool operator==(void *rhs) const { return Ptr == rhs; } + bool contains(uint64_t Addr) const { return Addr >= Offset && Addr < (Offset + Size); } + }; +} + diff --git a/include/FEXCore/Memory/SharedMem.h b/include/FEXCore/Memory/SharedMem.h new file mode 100644 index 000000000..8961f32d2 --- /dev/null +++ b/include/FEXCore/Memory/SharedMem.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace FEXCore::SHM { + + struct SHMObject { + void *Ptr; + uint8_t InternalState[0]; + }; + + /** + * @brief Allocate a shared memory region that will be the base of our VM's memory + * + * @param Size The size of the SHM region + * + * @return An object representing our internal SHM state + */ + SHMObject *AllocateSHMRegion(size_t Size); + + /** + * @brief Destroy the SHM region + * + * @param SHM The region previously created with AllocateSHMRegion + */ + void DestroyRegion(SHMObject *SHM); +}