diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 9b54df8d7..8ae032d3f 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -129,8 +129,6 @@ add_library(core set(RECOMPILER_SRCS cpu_recompiler.cpp cpu_recompiler.h - cpu_recompiler_thunks.h - cpu_recompiler_types.h ) target_precompile_headers(core PRIVATE "pch.h") diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 7c4a168b9..1de21f683 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -106,8 +106,6 @@ true - - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 0e497ecc8..e1dc75c5c 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -90,9 +90,7 @@ - - diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 5dec2de6e..fe968203b 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -6,7 +6,6 @@ #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_disasm.h" -#include "cpu_recompiler_types.h" #include "host.h" #include "settings.h" #include "system.h" @@ -1564,7 +1563,7 @@ bool CPU::CodeCache::CompileBlock(Block* block) #ifdef ENABLE_RECOMPILER if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) - host_code = Recompiler::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size); + host_code = g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size); #endif block->host_code = host_code; diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index a01b50566..a6b3cb375 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -7,7 +7,6 @@ #include "cpu_core_private.h" #include "cpu_disasm.h" #include "cpu_pgxp.h" -#include "cpu_recompiler_thunks.h" #include "gte.h" #include "host.h" #include "pcdrv.h" @@ -2628,13 +2627,13 @@ template void CPU::CodeCache::InterpretUncachedBlock(); template void CPU::CodeCache::InterpretUncachedBlock(); template void CPU::CodeCache::InterpretUncachedBlock(); -bool CPU::Recompiler::Thunks::InterpretInstruction() +bool CPU::RecompilerThunks::InterpretInstruction() { ExecuteInstruction(); return g_state.exception_raised; } -bool CPU::Recompiler::Thunks::InterpretInstructionPGXP() +bool CPU::RecompilerThunks::InterpretInstructionPGXP() { ExecuteInstruction(); return g_state.exception_raised; @@ -3477,7 +3476,7 @@ bool CPU::WriteMemoryWord(VirtualMemoryAddress addr, u32 value) return true; } -u64 CPU::Recompiler::Thunks::ReadMemoryByte(u32 address) +u64 CPU::RecompilerThunks::ReadMemoryByte(u32 address) { const u32 value = GetMemoryReadHandler(address, MemoryAccessSize::Byte)(address); if (g_state.bus_error) [[unlikely]] @@ -3490,7 +3489,7 @@ u64 CPU::Recompiler::Thunks::ReadMemoryByte(u32 address) return ZeroExtend64(value); } -u64 CPU::Recompiler::Thunks::ReadMemoryHalfWord(u32 address) +u64 CPU::RecompilerThunks::ReadMemoryHalfWord(u32 address) { if (!Common::IsAlignedPow2(address, 2)) [[unlikely]] { @@ -3509,7 +3508,7 @@ u64 CPU::Recompiler::Thunks::ReadMemoryHalfWord(u32 address) return ZeroExtend64(value); } -u64 CPU::Recompiler::Thunks::ReadMemoryWord(u32 address) +u64 CPU::RecompilerThunks::ReadMemoryWord(u32 address) { if (!Common::IsAlignedPow2(address, 4)) [[unlikely]] { @@ -3528,7 +3527,7 @@ u64 CPU::Recompiler::Thunks::ReadMemoryWord(u32 address) return ZeroExtend64(value); } -u32 CPU::Recompiler::Thunks::WriteMemoryByte(u32 address, u32 value) +u32 CPU::RecompilerThunks::WriteMemoryByte(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::Byte, address, value); @@ -3542,7 +3541,7 @@ u32 CPU::Recompiler::Thunks::WriteMemoryByte(u32 address, u32 value) return 0; } -u32 CPU::Recompiler::Thunks::WriteMemoryHalfWord(u32 address, u32 value) +u32 CPU::RecompilerThunks::WriteMemoryHalfWord(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::HalfWord, address, value); @@ -3562,7 +3561,7 @@ u32 CPU::Recompiler::Thunks::WriteMemoryHalfWord(u32 address, u32 value) return 0; } -u32 CPU::Recompiler::Thunks::WriteMemoryWord(u32 address, u32 value) +u32 CPU::RecompilerThunks::WriteMemoryWord(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::Word, address, value); @@ -3582,40 +3581,40 @@ u32 CPU::Recompiler::Thunks::WriteMemoryWord(u32 address, u32 value) return 0; } -u32 CPU::Recompiler::Thunks::UncheckedReadMemoryByte(u32 address) +u32 CPU::RecompilerThunks::UncheckedReadMemoryByte(u32 address) { const u32 value = GetMemoryReadHandler(address, MemoryAccessSize::Byte)(address); MEMORY_BREAKPOINT(MemoryAccessType::Read, MemoryAccessSize::Byte, address, value); return value; } -u32 CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord(u32 address) +u32 CPU::RecompilerThunks::UncheckedReadMemoryHalfWord(u32 address) { const u32 value = GetMemoryReadHandler(address, MemoryAccessSize::HalfWord)(address); MEMORY_BREAKPOINT(MemoryAccessType::Read, MemoryAccessSize::HalfWord, address, value); return value; } -u32 CPU::Recompiler::Thunks::UncheckedReadMemoryWord(u32 address) +u32 CPU::RecompilerThunks::UncheckedReadMemoryWord(u32 address) { const u32 value = GetMemoryReadHandler(address, MemoryAccessSize::Word)(address); MEMORY_BREAKPOINT(MemoryAccessType::Read, MemoryAccessSize::Word, address, value); return value; } -void CPU::Recompiler::Thunks::UncheckedWriteMemoryByte(u32 address, u32 value) +void CPU::RecompilerThunks::UncheckedWriteMemoryByte(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::Byte, address, value); GetMemoryWriteHandler(address, MemoryAccessSize::Byte)(address, value); } -void CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord(u32 address, u32 value) +void CPU::RecompilerThunks::UncheckedWriteMemoryHalfWord(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::HalfWord, address, value); GetMemoryWriteHandler(address, MemoryAccessSize::HalfWord)(address, value); } -void CPU::Recompiler::Thunks::UncheckedWriteMemoryWord(u32 address, u32 value) +void CPU::RecompilerThunks::UncheckedWriteMemoryWord(u32 address, u32 value) { MEMORY_BREAKPOINT(MemoryAccessType::Write, MemoryAccessSize::Word, address, value); GetMemoryWriteHandler(address, MemoryAccessSize::Word)(address, value); diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 1f49e71e8..921aa0cfd 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -132,4 +132,36 @@ ALWAYS_INLINE static void StallUntilGTEComplete() void HandleA0Syscall(); void HandleB0Syscall(); +#ifdef ENABLE_RECOMPILER + +namespace RecompilerThunks { + +////////////////////////////////////////////////////////////////////////// +// Trampolines for calling back from the JIT +// Needed because we can't cast member functions to void*... +// TODO: Abuse carry flag or something else for exception +////////////////////////////////////////////////////////////////////////// +bool InterpretInstruction(); +bool InterpretInstructionPGXP(); + +// Memory access functions for the JIT - MSB is set on exception. +u64 ReadMemoryByte(u32 address); +u64 ReadMemoryHalfWord(u32 address); +u64 ReadMemoryWord(u32 address); +u32 WriteMemoryByte(u32 address, u32 value); +u32 WriteMemoryHalfWord(u32 address, u32 value); +u32 WriteMemoryWord(u32 address, u32 value); + +// Unchecked memory access variants. No alignment or bus exceptions. +u32 UncheckedReadMemoryByte(u32 address); +u32 UncheckedReadMemoryHalfWord(u32 address); +u32 UncheckedReadMemoryWord(u32 address); +void UncheckedWriteMemoryByte(u32 address, u32 value); +void UncheckedWriteMemoryHalfWord(u32 address, u32 value); +void UncheckedWriteMemoryWord(u32 address, u32 value); + +} // namespace RecompilerThunks + +#endif + } // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_recompiler.h b/src/core/cpu_recompiler.h index 8d14d3592..625420767 100644 --- a/src/core/cpu_recompiler.h +++ b/src/core/cpu_recompiler.h @@ -4,7 +4,6 @@ #pragma once #include "cpu_code_cache_private.h" -#include "cpu_recompiler_types.h" #include "cpu_types.h" #include @@ -13,36 +12,72 @@ #include #include -namespace CPU::Recompiler { - -// Global options -static constexpr bool EMULATE_LOAD_DELAYS = true; -static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true; - -// Arch-specific options -#if defined(CPU_ARCH_X64) -static constexpr u32 NUM_HOST_REGS = 16; -static constexpr bool HAS_MEMORY_OPERANDS = true; -#elif defined(CPU_ARCH_ARM32) -static constexpr u32 NUM_HOST_REGS = 16; -static constexpr bool HAS_MEMORY_OPERANDS = false; -#elif defined(CPU_ARCH_ARM64) -static constexpr u32 NUM_HOST_REGS = 32; -static constexpr bool HAS_MEMORY_OPERANDS = false; -#elif defined(CPU_ARCH_RISCV64) -static constexpr u32 NUM_HOST_REGS = 32; -static constexpr bool HAS_MEMORY_OPERANDS = false; -#endif +namespace CPU { // TODO: Get rid of the virtuals... somehow. class Recompiler { +public: + // Global options + static constexpr bool EMULATE_LOAD_DELAYS = true; + static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true; + + // Arch-specific options +#if defined(CPU_ARCH_X64) + + // A reasonable "maximum" number of bytes per instruction. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; + static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + + // Number of host registers. + static constexpr u32 NUM_HOST_REGS = 16; + static constexpr bool HAS_MEMORY_OPERANDS = true; + +#elif defined(CPU_ARCH_ARM32) + + // A reasonable "maximum" number of bytes per instruction. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; + static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + + // Number of host registers. + static constexpr u32 NUM_HOST_REGS = 16; + static constexpr bool HAS_MEMORY_OPERANDS = false; + +#elif defined(CPU_ARCH_ARM64) + + // Number of host registers. + static constexpr u32 NUM_HOST_REGS = 32; + static constexpr bool HAS_MEMORY_OPERANDS = false; + + // A reasonable "maximum" number of bytes per instruction. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; + static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + +#elif defined(CPU_ARCH_RISCV64) + + // Number of host registers. + static constexpr u32 NUM_HOST_REGS = 32; + static constexpr bool HAS_MEMORY_OPERANDS = false; + + // A reasonable "maximum" number of bytes per instruction. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; + static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + +#endif + public: Recompiler(); virtual ~Recompiler(); const void* CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size); + static void BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info); + + static u32 CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load); + protected: enum FlushFlags : u32 { @@ -274,7 +309,7 @@ protected: void CompileTemplate(void (Recompiler::*const_func)(CompileFlags), void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags); void CompileLoadStoreTemplate(void (Recompiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, - const std::optional&), + const std::optional&), MemoryAccessSize size, bool store, bool sign, u32 tflags); void FlushForLoadStore(const std::optional& address, bool store, bool use_fastmem); void CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move); @@ -533,11 +568,5 @@ protected: static const std::array s_pgxp_mem_store_functions; }; -void BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info); - -u32 CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, TickCount cycles_to_add, - TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register, - MemoryAccessSize size, bool is_signed, bool is_load); - extern Recompiler* g_compiler; -} // namespace CPU::Recompiler +} // namespace CPU diff --git a/src/core/cpu_recompiler_arm32.cpp b/src/core/cpu_recompiler_arm32.cpp index 4745b870a..d480ce5e5 100644 --- a/src/core/cpu_recompiler_arm32.cpp +++ b/src/core/cpu_recompiler_arm32.cpp @@ -4,8 +4,6 @@ #include "cpu_recompiler_arm32.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" -#include "cpu_recompiler_thunks.h" -#include "cpu_recompiler_types.h" #include "gte.h" #include "settings.h" #include "timing_event.h" @@ -20,6 +18,9 @@ #ifdef CPU_ARCH_ARM32 +#include "vixl/aarch32/constants-aarch32.h" +#include "vixl/aarch32/instructions-aarch32.h" + #ifdef ENABLE_HOST_DISASSEMBLY #include "vixl/aarch32/disasm-aarch32.h" #include @@ -30,43 +31,64 @@ LOG_CHANNEL(Recompiler); #define PTR(x) vixl::aarch32::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) #define RMEMBASE vixl::aarch32::r3 -namespace CPU::Recompiler { +static constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers +static constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes +static constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; -using namespace vixl::aarch32; +#define RRET vixl::aarch32::r0 +#define RRETHI vixl::aarch32::r1 +#define RARG1 vixl::aarch32::r0 +#define RARG2 vixl::aarch32::r1 +#define RARG3 vixl::aarch32::r2 +#define RSCRATCH vixl::aarch32::r12 +#define RSTATE vixl::aarch32::r4 -constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers -constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes -constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; +static bool armIsCallerSavedRegister(u32 id); +static s32 armGetPCDisplacement(const void* current, const void* target); +static bool armIsPCDisplacementInImmediateRange(s32 displacement); +static void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr); +static void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm); +static void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); +static void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); +static void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr); +static void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr); +static void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr, + const vixl::aarch32::Register& tempreg = RSCRATCH); +static u8* armGetJumpTrampoline(const void* target); static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; static std::unordered_map s_trampoline_targets; static u8* s_trampoline_start_ptr = nullptr; static u32 s_trampoline_used = 0; +namespace CPU { + +using namespace vixl::aarch32; + static ARM32Recompiler s_instance; Recompiler* g_compiler = &s_instance; -} // namespace CPU::Recompiler +} // namespace CPU -bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) +bool armIsCallerSavedRegister(u32 id) { return ((id >= 0 && id <= 3) || // r0-r3 (id == 12 || id == 14)); // sp, pc } -s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) +s32 armGetPCDisplacement(const void* current, const void* target) { Assert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); Assert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); return static_cast((reinterpret_cast(target) - reinterpret_cast(current))); } -bool CPU::Recompiler::armIsPCDisplacementInImmediateRange(s32 displacement) +bool armIsPCDisplacementInImmediateRange(s32 displacement) { return (displacement >= -33554432 && displacement <= 33554428); } -void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm) +void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm) { if (vixl::IsUintN(16, imm)) { @@ -78,13 +100,12 @@ void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::a armAsm->movt(al, rd, imm >> 16); } -void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr) +void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr) { armEmitMov(armAsm, reg, static_cast(reinterpret_cast(addr))); } -void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) +void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) { const void* cur = armAsm->GetCursorAddress(); s32 displacement = armGetPCDisplacement(cur, ptr); @@ -110,7 +131,7 @@ void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* p } } -void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) +void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) { const void* cur = armAsm->GetCursorAddress(); s32 displacement = armGetPCDisplacement(cur, ptr); @@ -136,8 +157,7 @@ void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* } } -void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, - const void* ptr) +void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr) { const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress(), ptr); if (!armIsPCDisplacementInImmediateRange(displacement)) @@ -152,15 +172,14 @@ void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl:: } } -void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr) +void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr) { armMoveAddressToReg(armAsm, reg, addr); armAsm->ldr(reg, MemOperand(reg)); } -void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr, const vixl::aarch32::Register& tempreg) +void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr, + const vixl::aarch32::Register& tempreg) { armMoveAddressToReg(armAsm, tempreg, addr); armAsm->str(reg, MemOperand(tempreg)); @@ -319,19 +338,19 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) return static_cast(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/; } -CPU::Recompiler::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32) +CPU::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32) { } -CPU::Recompiler::ARM32Recompiler::~ARM32Recompiler() = default; +CPU::ARM32Recompiler::~ARM32Recompiler() = default; -const void* CPU::Recompiler::ARM32Recompiler::GetCurrentCodePointer() +const void* CPU::ARM32Recompiler::GetCurrentCodePointer() { return armAsm->GetCursorAddress(); } -void CPU::Recompiler::ARM32Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::ARM32Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, + u32 far_code_space) { Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); @@ -369,7 +388,7 @@ void CPU::Recompiler::ARM32Recompiler::Reset(CodeCache::Block* block, u8* code_b } } -void CPU::Recompiler::ARM32Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond) +void CPU::ARM32Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond) { DebugAssert(armAsm == &m_emitter); if (emit_jump) @@ -395,7 +414,7 @@ void CPU::Recompiler::ARM32Recompiler::SwitchToFarCode(bool emit_jump, vixl::aar armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit) +void CPU::ARM32Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit) { armAsm->tst(reg, 1u << bit); @@ -416,8 +435,7 @@ void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, - bool nonzero) +void CPU::ARM32Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, bool nonzero) { armAsm->cmp(reg, 0); @@ -438,7 +456,7 @@ void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const v armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM32Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond) +void CPU::ARM32Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond) { DebugAssert(armAsm == &m_far_emitter); if (emit_jump) @@ -464,17 +482,17 @@ void CPU::Recompiler::ARM32Recompiler::SwitchToNearCode(bool emit_jump, vixl::aa armAsm = &m_emitter; } -void CPU::Recompiler::ARM32Recompiler::EmitMov(const vixl::aarch32::Register& dst, u32 val) +void CPU::ARM32Recompiler::EmitMov(const vixl::aarch32::Register& dst, u32 val) { armEmitMov(armAsm, dst, val); } -void CPU::Recompiler::ARM32Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) +void CPU::ARM32Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) { armEmitCall(armAsm, ptr, force_inline); } -vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckAddSubConstant(s32 val) +vixl::aarch32::Operand CPU::ARM32Recompiler::armCheckAddSubConstant(s32 val) { if (ImmediateA32::IsImmediateA32(static_cast(val))) return vixl::aarch32::Operand(static_cast(val)); @@ -483,27 +501,27 @@ vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckAddSubConstant( return vixl::aarch32::Operand(RSCRATCH); } -vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckAddSubConstant(u32 val) +vixl::aarch32::Operand CPU::ARM32Recompiler::armCheckAddSubConstant(u32 val) { return armCheckAddSubConstant(static_cast(val)); } -vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckCompareConstant(s32 val) +vixl::aarch32::Operand CPU::ARM32Recompiler::armCheckCompareConstant(s32 val) { return armCheckAddSubConstant(val); } -vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckLogicalConstant(u32 val) +vixl::aarch32::Operand CPU::ARM32Recompiler::armCheckLogicalConstant(u32 val) { return armCheckAddSubConstant(val); } -void CPU::Recompiler::ARM32Recompiler::BeginBlock() +void CPU::ARM32Recompiler::BeginBlock() { Recompiler::BeginBlock(); } -void CPU::Recompiler::ARM32Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::ARM32Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset armMoveAddressToReg(armAsm, RARG1, ram_ptr); @@ -579,7 +597,7 @@ bool foo(const void* a, const void* b) armAsm->bind(&block_unchanged); } -void CPU::Recompiler::ARM32Recompiler::GenerateICacheCheckAndUpdate() +void CPU::ARM32Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -635,8 +653,8 @@ void CPU::Recompiler::ARM32Recompiler::GenerateICacheCheckAndUpdate() } } -void CPU::Recompiler::ARM32Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::ARM32Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RARG1.GetCode())) armAsm->mov(RARG1, Register(arg1reg)); @@ -647,7 +665,7 @@ void CPU::Recompiler::ARM32Recompiler::GenerateCall(const void* func, s32 arg1re EmitCall(func); } -void CPU::Recompiler::ARM32Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::ARM32Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -664,7 +682,7 @@ void CPU::Recompiler::ARM32Recompiler::EndBlock(const std::optional& newpc, EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::Recompiler::ARM32Recompiler::EndBlockWithException(Exception excode) +void CPU::ARM32Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -682,8 +700,7 @@ void CPU::Recompiler::ARM32Recompiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::Recompiler::ARM32Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::ARM32Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, bool force_run_events) { // event test // pc should've been flushed @@ -740,7 +757,7 @@ void CPU::Recompiler::ARM32Recompiler::EndAndLinkBlock(const std::optional& } } -const void* CPU::Recompiler::ARM32Recompiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::ARM32Recompiler::EndCompile(u32* code_size, u32* far_code_size) { #ifdef VIXL_DEBUG m_emitter_check.reset(); @@ -757,7 +774,7 @@ const void* CPU::Recompiler::ARM32Recompiler::EndCompile(u32* code_size, u32* fa return code; } -const char* CPU::Recompiler::ARM32Recompiler::GetHostRegName(u32 reg) const +const char* CPU::ARM32Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", @@ -765,80 +782,80 @@ const char* CPU::Recompiler::ARM32Recompiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::Recompiler::ARM32Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::ARM32Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(Register(reg), val); } -void CPU::Recompiler::ARM32Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::ARM32Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { armAsm->ldr(Register(reg), PTR(ptr)); } -void CPU::Recompiler::ARM32Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::ARM32Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { armAsm->str(Register(reg), PTR(ptr)); } -void CPU::Recompiler::ARM32Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::ARM32Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { EmitMov(RSCRATCH, val); armAsm->str(RSCRATCH, PTR(ptr)); } -void CPU::Recompiler::ARM32Recompiler::CopyHostReg(u32 dst, u32 src) +void CPU::ARM32Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) armAsm->mov(Register(dst), Register(src)); } -void CPU::Recompiler::ARM32Recompiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::ARM32Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::Recompiler::ARM32Recompiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::ARM32Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -vixl::aarch32::MemOperand CPU::Recompiler::ARM32Recompiler::MipsPtr(Reg r) const +vixl::aarch32::MemOperand CPU::ARM32Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return PTR(&g_state.regs.r[static_cast(r)]); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegD(CompileFlags cf) const +vixl::aarch32::Register CPU::ARM32Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return Register(cf.host_d); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegS(CompileFlags cf) const +vixl::aarch32::Register CPU::ARM32Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return Register(cf.host_s); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegT(CompileFlags cf) const +vixl::aarch32::Register CPU::ARM32Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return Register(cf.host_t); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegLO(CompileFlags cf) const +vixl::aarch32::Register CPU::ARM32Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return Register(cf.host_lo); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegHI(CompileFlags cf) const +vixl::aarch32::Register CPU::ARM32Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return Register(cf.host_hi); } -vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::GetMembaseReg() +vixl::aarch32::Register CPU::ARM32Recompiler::GetMembaseReg() { const u32 code = RMEMBASE.GetCode(); if (!IsHostRegAllocated(code)) @@ -852,7 +869,7 @@ vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::GetMembaseReg() return RMEMBASE; } -void CPU::Recompiler::ARM32Recompiler::MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf) +void CPU::ARM32Recompiler::MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -871,7 +888,7 @@ void CPU::Recompiler::ARM32Recompiler::MoveSToReg(const vixl::aarch32::Register& } } -void CPU::Recompiler::ARM32Recompiler::MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf) +void CPU::ARM32Recompiler::MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -890,7 +907,7 @@ void CPU::Recompiler::ARM32Recompiler::MoveTToReg(const vixl::aarch32::Register& } } -void CPU::Recompiler::ARM32Recompiler::MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg) +void CPU::ARM32Recompiler::MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg) { DebugAssert(reg < Reg::count); if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) @@ -901,9 +918,8 @@ void CPU::Recompiler::ARM32Recompiler::MoveMIPSRegToReg(const vixl::aarch32::Reg armAsm->ldr(dst, MipsPtr(reg)); } -void CPU::Recompiler::ARM32Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::ARM32Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -918,7 +934,7 @@ void CPU::Recompiler::ARM32Recompiler::GeneratePGXPCallWithMIPSRegs(const void* EmitCall(func); } -void CPU::Recompiler::ARM32Recompiler::Flush(u32 flags) +void CPU::ARM32Recompiler::Flush(u32 flags) { Recompiler::Flush(flags); @@ -1010,13 +1026,13 @@ void CPU::Recompiler::ARM32Recompiler::Flush(u32 flags) } } -void CPU::Recompiler::ARM32Recompiler::Compile_Fallback() +void CPU::ARM32Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); Flush(FLUSH_FOR_INTERPRETER); - EmitCall(reinterpret_cast(&CPU::Recompiler::Thunks::InterpretInstruction)); + EmitCall(reinterpret_cast(&CPU::RecompilerThunks::InterpretInstruction)); // TODO: make me less garbage // TODO: this is wrong, it flushes the load delay on the same cycle when we return. @@ -1035,7 +1051,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::Recompiler::ARM32Recompiler::CheckBranchTarget(const vixl::aarch32::Register& pcreg) +void CPU::ARM32Recompiler::CheckBranchTarget(const vixl::aarch32::Register& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -1050,7 +1066,7 @@ void CPU::Recompiler::ARM32Recompiler::CheckBranchTarget(const vixl::aarch32::Re SwitchToNearCode(false); } -void CPU::Recompiler::ARM32Recompiler::Compile_jr(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_jr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -1061,7 +1077,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_jalr(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_jalr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -1074,7 +1090,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::ARM32Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -1148,7 +1164,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_bxx(CompileFlags cf, BranchCondit EndBlock(taken_pc, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::ARM32Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -1170,27 +1186,27 @@ void CPU::Recompiler::ARM32Recompiler::Compile_addi(CompileFlags cf, bool overfl } } -void CPU::Recompiler::ARM32Recompiler::Compile_addi(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::ARM32Recompiler::Compile_addiu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_slti(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_sltiu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::ARM32Recompiler::Compile_slti(CompileFlags cf, bool sign) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -1199,7 +1215,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_slti(CompileFlags cf, bool sign) armAsm->mov(sign ? lt : lo, rt, 1); } -void CPU::Recompiler::ARM32Recompiler::Compile_andi(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_andi(CompileFlags cf) { const Register rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -1208,7 +1224,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_andi(CompileFlags cf) EmitMov(rt, 0); } -void CPU::Recompiler::ARM32Recompiler::Compile_ori(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_ori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -1218,7 +1234,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_ori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::Recompiler::ARM32Recompiler::Compile_xori(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_xori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -1228,10 +1244,9 @@ void CPU::Recompiler::ARM32Recompiler::Compile_xori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::Recompiler::ARM32Recompiler::Compile_shift(CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, - vixl::aarch32::Register, - const Operand&)) +void CPU::ARM32Recompiler::Compile_shift(CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, + vixl::aarch32::Register, const Operand&)) { const Register rd = CFGetRegD(cf); const Register rt = CFGetRegT(cf); @@ -1241,24 +1256,25 @@ void CPU::Recompiler::ARM32Recompiler::Compile_shift(CompileFlags cf, armAsm->mov(rd, rt); } -void CPU::Recompiler::ARM32Recompiler::Compile_sll(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::lsl); } -void CPU::Recompiler::ARM32Recompiler::Compile_srl(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::lsr); } -void CPU::Recompiler::ARM32Recompiler::Compile_sra(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::asr); } -void CPU::Recompiler::ARM32Recompiler::Compile_variable_shift( - CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, vixl::aarch32::Register, const Operand&)) +void CPU::ARM32Recompiler::Compile_variable_shift(CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, + vixl::aarch32::Register, + const Operand&)) { const Register rd = CFGetRegD(cf); @@ -1283,22 +1299,22 @@ void CPU::Recompiler::ARM32Recompiler::Compile_variable_shift( } } -void CPU::Recompiler::ARM32Recompiler::Compile_sllv(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsl); } -void CPU::Recompiler::ARM32Recompiler::Compile_srlv(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsr); } -void CPU::Recompiler::ARM32Recompiler::Compile_srav(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::asr); } -void CPU::Recompiler::ARM32Recompiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::ARM32Recompiler::Compile_mult(CompileFlags cf, bool sign) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1315,17 +1331,17 @@ void CPU::Recompiler::ARM32Recompiler::Compile_mult(CompileFlags cf, bool sign) (sign) ? armAsm->smull(lo, hi, rs, rt) : armAsm->umull(lo, hi, rs, rt); } -void CPU::Recompiler::ARM32Recompiler::Compile_mult(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_multu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_div(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_div(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1371,7 +1387,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_div(CompileFlags cf) armAsm->bind(&done); } -void CPU::Recompiler::ARM32Recompiler::Compile_divu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_divu(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1402,7 +1418,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_divu(CompileFlags cf) armAsm->bind(&done); } -void CPU::Recompiler::ARM32Recompiler::TestOverflow(const vixl::aarch32::Register& result) +void CPU::ARM32Recompiler::TestOverflow(const vixl::aarch32::Register& result) { SwitchToFarCode(true, vs); @@ -1418,11 +1434,10 @@ void CPU::Recompiler::ARM32Recompiler::TestOverflow(const vixl::aarch32::Registe SwitchToNearCode(false); } -void CPU::Recompiler::ARM32Recompiler::Compile_dst_op(CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, - vixl::aarch32::Register, - const Operand&), - bool commutative, bool logical, bool overflow) +void CPU::ARM32Recompiler::Compile_dst_op(CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, + vixl::aarch32::Register, const Operand&), + bool commutative, bool logical, bool overflow) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1470,7 +1485,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_dst_op(CompileFlags cf, TestOverflow(rd); } -void CPU::Recompiler::ARM32Recompiler::Compile_add(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_add(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::adds, true, false, true); @@ -1478,12 +1493,12 @@ void CPU::Recompiler::ARM32Recompiler::Compile_add(CompileFlags cf) Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_addu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_sub(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sub(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::subs, false, false, true); @@ -1491,12 +1506,12 @@ void CPU::Recompiler::ARM32Recompiler::Compile_sub(CompileFlags cf) Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_subu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_and(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1517,7 +1532,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &Assembler::and_, true, true, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_or(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1533,7 +1548,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &Assembler::orr, true, true, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_xor(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1555,23 +1570,23 @@ void CPU::Recompiler::ARM32Recompiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &Assembler::eor, true, true, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_nor(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::Recompiler::ARM32Recompiler::Compile_slt(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::Recompiler::ARM32Recompiler::Compile_sltu(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::Recompiler::ARM32Recompiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::ARM32Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1597,9 +1612,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_slt(CompileFlags cf, bool sign) } vixl::aarch32::Register -CPU::Recompiler::ARM32Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, - const std::optional& address, - const std::optional& reg) +CPU::ARM32Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional& address, + const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1639,9 +1653,9 @@ CPU::Recompiler::ARM32Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, } template -vixl::aarch32::Register -CPU::Recompiler::ARM32Recompiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, - bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) +vixl::aarch32::Register CPU::ARM32Recompiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, + MemoryAccessSize size, bool sign, bool use_fastmem, + const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1683,20 +1697,20 @@ CPU::Recompiler::ARM32Recompiler::GenerateLoad(const vixl::aarch32::Register& ad { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord)); } break; } @@ -1751,9 +1765,9 @@ CPU::Recompiler::ARM32Recompiler::GenerateLoad(const vixl::aarch32::Register& ad return dst_reg; } -void CPU::Recompiler::ARM32Recompiler::GenerateStore(const vixl::aarch32::Register& addr_reg, - const vixl::aarch32::Register& value_reg, MemoryAccessSize size, - bool use_fastmem) +void CPU::ARM32Recompiler::GenerateStore(const vixl::aarch32::Register& addr_reg, + const vixl::aarch32::Register& value_reg, MemoryAccessSize size, + bool use_fastmem) { if (use_fastmem) { @@ -1793,20 +1807,20 @@ void CPU::Recompiler::ARM32Recompiler::GenerateStore(const vixl::aarch32::Regist { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord)); } break; } @@ -1837,8 +1851,8 @@ void CPU::Recompiler::ARM32Recompiler::GenerateStore(const vixl::aarch32::Regist } } -void CPU::Recompiler::ARM32Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1865,8 +1879,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_lxx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM32Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1959,8 +1973,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_lwx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM32Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2045,8 +2059,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_lwc2(CompileFlags cf, MemoryAcces } } -void CPU::Recompiler::ARM32Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -2073,8 +2087,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_sxx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM32Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2147,8 +2161,8 @@ void CPU::Recompiler::ARM32Recompiler::Compile_swx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM32Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM32Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2203,7 +2217,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_swc2(CompileFlags cf, MemoryAcces } } -void CPU::Recompiler::ARM32Recompiler::Compile_mtc0(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -2281,7 +2295,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_mtc0(CompileFlags cf) } } -void CPU::Recompiler::ARM32Recompiler::Compile_rfe(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -2293,7 +2307,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_rfe(CompileFlags cf) TestInterrupts(RARG1); } -void CPU::Recompiler::ARM32Recompiler::TestInterrupts(const vixl::aarch32::Register& sr) +void CPU::ARM32Recompiler::TestInterrupts(const vixl::aarch32::Register& sr) { // if Iec == 0 then goto no_interrupt Label no_interrupt; @@ -2344,7 +2358,7 @@ void CPU::Recompiler::ARM32Recompiler::TestInterrupts(const vixl::aarch32::Regis armAsm->bind(&no_interrupt); } -void CPU::Recompiler::ARM32Recompiler::Compile_mfc2(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2385,7 +2399,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_mfc2(CompileFlags cf) } } -void CPU::Recompiler::ARM32Recompiler::Compile_mtc2(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2447,7 +2461,7 @@ void CPU::Recompiler::ARM32Recompiler::Compile_mtc2(CompileFlags cf) } } -void CPU::Recompiler::ARM32Recompiler::Compile_cop2(CompileFlags cf) +void CPU::ARM32Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2514,24 +2528,24 @@ u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, vo case MemoryAccessSize::Byte: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte), + is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte), false); } break; case MemoryAccessSize::HalfWord: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord), + is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord), false); } break; case MemoryAccessSize::Word: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord), + is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord), false); } break; diff --git a/src/core/cpu_recompiler_arm32.h b/src/core/cpu_recompiler_arm32.h index 91fe4e98d..18e0ea675 100644 --- a/src/core/cpu_recompiler_arm32.h +++ b/src/core/cpu_recompiler_arm32.h @@ -12,7 +12,7 @@ #include "vixl/aarch32/assembler-aarch32.h" #include "vixl/aarch32/operands-aarch32.h" -namespace CPU::Recompiler { +namespace CPU { class ARM32Recompiler final : public Recompiler { @@ -165,6 +165,6 @@ private: #endif }; -} // namespace CPU::Recompiler +} // namespace CPU #endif // CPU_ARCH_ARM32 diff --git a/src/core/cpu_recompiler_arm64.cpp b/src/core/cpu_recompiler_arm64.cpp index 125857695..7ceb6531c 100644 --- a/src/core/cpu_recompiler_arm64.cpp +++ b/src/core/cpu_recompiler_arm64.cpp @@ -4,8 +4,6 @@ #include "cpu_recompiler_arm64.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" -#include "cpu_recompiler_thunks.h" -#include "cpu_recompiler_types.h" #include "gte.h" #include "settings.h" #include "timing_event.h" @@ -20,6 +18,8 @@ #ifdef CPU_ARCH_ARM64 +#include "vixl/aarch64/constants-aarch64.h" + #ifdef ENABLE_HOST_DISASSEMBLY #include "vixl/aarch64/disasm-aarch64.h" #endif @@ -28,39 +28,66 @@ LOG_CHANNEL(Recompiler); #define PTR(x) vixl::aarch64::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) -namespace CPU::Recompiler { +static constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers +static constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes +static constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; -using namespace vixl::aarch64; +#define RWRET vixl::aarch64::w0 +#define RXRET vixl::aarch64::x0 +#define RWARG1 vixl::aarch64::w0 +#define RXARG1 vixl::aarch64::x0 +#define RWARG2 vixl::aarch64::w1 +#define RXARG2 vixl::aarch64::x1 +#define RWARG3 vixl::aarch64::w2 +#define RXARG3 vixl::aarch64::x2 +#define RWSCRATCH vixl::aarch64::w16 +#define RXSCRATCH vixl::aarch64::x16 +#define RSTATE vixl::aarch64::x19 +#define RMEMBASE vixl::aarch64::x20 -constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers -constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes -constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; +bool armIsCallerSavedRegister(u32 id); +s64 armGetPCDisplacement(const void* current, const void* target); +bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr); +void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr); +void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm); +void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); +void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); +void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr); +void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, + bool sign_extend_word = false); +void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, + const vixl::aarch64::Register& tempreg = RXSCRATCH); +u8* armGetJumpTrampoline(const void* target); static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; static std::unordered_map s_trampoline_targets; static u8* s_trampoline_start_ptr = nullptr; static u32 s_trampoline_used = 0; +namespace CPU { + +using namespace vixl::aarch64; + static ARM64Recompiler s_instance; Recompiler* g_compiler = &s_instance; -} // namespace CPU::Recompiler +} // namespace CPU -bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) +bool armIsCallerSavedRegister(u32 id) { // same on both linux and windows return (id <= 18); } -void CPU::Recompiler::armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm) +void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm) { // From vixl macro assembler. DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits()); - DebugAssert(rd.GetCode() != sp.GetCode()); + DebugAssert(rd.GetCode() != vixl::aarch64::sp.GetCode()); if (imm == 0) { - armAsm->mov(rd, Assembler::AppropriateZeroRegFor(rd)); + armAsm->mov(rd, vixl::aarch64::Assembler::AppropriateZeroRegFor(rd)); return; } @@ -87,25 +114,25 @@ void CPU::Recompiler::armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::a // using multiple instructions. const unsigned reg_size = rd.GetSizeInBits(); - if (Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP()) + if (vixl::aarch64::Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP()) { // Immediate can be represented in a move zero instruction. Movz can't write // to the stack pointer. armAsm->movz(rd, imm); return; } - else if (Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP()) + else if (vixl::aarch64::Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP()) { // Immediate can be represented in a move negative instruction. Movn can't // write to the stack pointer. - armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & kWRegMask)); + armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & vixl::aarch64::kWRegMask)); return; } - else if (Assembler::IsImmLogical(imm, reg_size)) + else if (vixl::aarch64::Assembler::IsImmLogical(imm, reg_size)) { // Immediate can be represented in a logical orr instruction. DebugAssert(!rd.IsZero()); - armAsm->orr(rd, Assembler::AppropriateZeroRegFor(rd), imm); + armAsm->orr(rd, vixl::aarch64::Assembler::AppropriateZeroRegFor(rd), imm); return; } @@ -152,14 +179,14 @@ void CPU::Recompiler::armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::a DebugAssert(first_mov_done); } -s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) +s64 armGetPCDisplacement(const void* current, const void* target) { // pxAssert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); // pxAssert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); return static_cast((reinterpret_cast(target) - reinterpret_cast(current)) >> 2); } -bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) +bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) { const void* cur = armAsm->GetCursorAddress(); const void* current_code_ptr_page = @@ -169,12 +196,11 @@ bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const v const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - return (vixl::IsInt21(page_displacement) && - (Assembler::IsImmAddSub(page_offset) || Assembler::IsImmLogical(page_offset, 64))); + return (vixl::IsInt21(page_displacement) && (vixl::aarch64::Assembler::IsImmAddSub(page_offset) || + vixl::aarch64::Assembler::IsImmLogical(page_offset, 64))); } -void CPU::Recompiler::armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, - const void* addr) +void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr) { DebugAssert(reg.IsX()); @@ -185,12 +211,12 @@ void CPU::Recompiler::armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, cons reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - if (vixl::IsInt21(page_displacement) && Assembler::IsImmAddSub(page_offset)) + if (vixl::IsInt21(page_displacement) && vixl::aarch64::Assembler::IsImmAddSub(page_offset)) { armAsm->adrp(reg, page_displacement); armAsm->add(reg, reg, page_offset); } - else if (vixl::IsInt21(page_displacement) && Assembler::IsImmLogical(page_offset, 64)) + else if (vixl::IsInt21(page_displacement) && vixl::aarch64::Assembler::IsImmLogical(page_offset, 64)) { armAsm->adrp(reg, page_displacement); armAsm->orr(reg, reg, page_offset); @@ -200,7 +226,8 @@ void CPU::Recompiler::armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, cons armEmitMov(armAsm, reg, reinterpret_cast(addr)); } } -void CPU::Recompiler::armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) + +void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) { const void* cur = armAsm->GetCursorAddress(); s64 displacement = armGetPCDisplacement(cur, ptr); @@ -226,7 +253,7 @@ void CPU::Recompiler::armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* p } } -void CPU::Recompiler::armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) +void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) { const void* cur = armAsm->GetCursorAddress(); s64 displacement = armGetPCDisplacement(cur, ptr); @@ -252,20 +279,19 @@ void CPU::Recompiler::armEmitCall(vixl::aarch64::Assembler* armAsm, const void* } } -void CPU::Recompiler::armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, - const void* ptr) +void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr) { const s64 jump_distance = static_cast(reinterpret_cast(ptr) - reinterpret_cast(armAsm->GetCursorAddress())); // pxAssert(Common::IsAligned(jump_distance, 4)); - if (vixl::aarch64::Instruction::IsValidImmPCOffset(CondBranchType, jump_distance >> 2)) + if (vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::CondBranchType, jump_distance >> 2)) { armAsm->b(jump_distance >> 2, cond); } else { - Label branch_not_taken; + vixl::aarch64::Label branch_not_taken; armAsm->b(&branch_not_taken, InvertCondition(cond)); const s64 new_jump_distance = static_cast(reinterpret_cast(ptr) - @@ -275,8 +301,8 @@ void CPU::Recompiler::armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl:: } } -void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, - const void* addr, bool sign_extend_word) +void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, + bool sign_extend_word) { const void* cur = armAsm->GetCursorAddress(); const void* current_code_ptr_page = @@ -285,7 +311,7 @@ void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vix reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - MemOperand memop; + vixl::aarch64::MemOperand memop; const vixl::aarch64::Register xreg = reg.X(); if (vixl::IsInt21(page_displacement)) @@ -305,8 +331,8 @@ void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vix armAsm->ldr(reg, memop); } -void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, - const void* addr, const vixl::aarch64::Register& tempreg) +void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, + const vixl::aarch64::Register& tempreg) { DebugAssert(tempreg.IsX()); @@ -321,16 +347,16 @@ void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vi if (vixl::IsInt21(page_displacement)) { armAsm->adrp(tempreg, page_displacement); - armAsm->str(reg, MemOperand(tempreg, static_cast(page_offset))); + armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast(page_offset))); } else { armMoveAddressToReg(armAsm, tempreg, addr); - armAsm->str(reg, MemOperand(tempreg)); + armAsm->str(reg, vixl::aarch64::MemOperand(tempreg)); } } -u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) +u8* armGetJumpTrampoline(const void* target) { auto it = s_trampoline_targets.find(target); if (it != s_trampoline_targets.end()) @@ -347,7 +373,7 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) } u8* start = s_trampoline_start_ptr + offset; - Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); + vixl::aarch64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); #ifdef VIXL_DEBUG vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); @@ -395,7 +421,6 @@ u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) { using namespace vixl::aarch64; - using namespace CPU::Recompiler; const s64 disp = armGetPCDisplacement(code, dst); DebugAssert(vixl::IsInt26(disp)); @@ -411,7 +436,6 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) { using namespace vixl::aarch64; - using namespace CPU::Recompiler; Assembler actual_asm(static_cast(code), code_size); Assembler* armAsm = &actual_asm; @@ -425,7 +449,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) g_enter_recompiler = armAsm->GetCursorAddress(); { // reserve some space for saving caller-saved registers - armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE); + armAsm->sub(sp, sp, FUNCTION_STACK_SIZE); // Need the CPU state for basically everything :-) armMoveAddressToReg(armAsm, RSTATE, &g_state); @@ -499,20 +523,19 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) return static_cast(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE; } -CPU::Recompiler::ARM64Recompiler::ARM64Recompiler() - : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) +CPU::ARM64Recompiler::ARM64Recompiler() : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) { } -CPU::Recompiler::ARM64Recompiler::~ARM64Recompiler() = default; +CPU::ARM64Recompiler::~ARM64Recompiler() = default; -const void* CPU::Recompiler::ARM64Recompiler::GetCurrentCodePointer() +const void* CPU::ARM64Recompiler::GetCurrentCodePointer() { return armAsm->GetCursorAddress(); } -void CPU::Recompiler::ARM64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::ARM64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, + u32 far_code_space) { Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); @@ -547,7 +570,7 @@ void CPU::Recompiler::ARM64Recompiler::Reset(CodeCache::Block* block, u8* code_b } } -void CPU::Recompiler::ARM64Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond) +void CPU::ARM64Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond) { DebugAssert(armAsm == &m_emitter); if (emit_jump) @@ -575,7 +598,7 @@ void CPU::Recompiler::ARM64Recompiler::SwitchToFarCode(bool emit_jump, vixl::aar armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit) +void CPU::ARM64Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit) { const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress()); if (vixl::IsInt14(disp)) @@ -593,8 +616,7 @@ void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, - bool nonzero) +void CPU::ARM64Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero) { const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress()); if (vixl::IsInt19(disp)) @@ -612,7 +634,7 @@ void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const v armAsm = &m_far_emitter; } -void CPU::Recompiler::ARM64Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond) +void CPU::ARM64Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond) { DebugAssert(armAsm == &m_far_emitter); if (emit_jump) @@ -623,17 +645,17 @@ void CPU::Recompiler::ARM64Recompiler::SwitchToNearCode(bool emit_jump, vixl::aa armAsm = &m_emitter; } -void CPU::Recompiler::ARM64Recompiler::EmitMov(const vixl::aarch64::Register& dst, u32 val) +void CPU::ARM64Recompiler::EmitMov(const vixl::aarch64::Register& dst, u32 val) { armEmitMov(armAsm, dst, val); } -void CPU::Recompiler::ARM64Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) +void CPU::ARM64Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) { armEmitCall(armAsm, ptr, force_inline); } -vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckAddSubConstant(s32 val) +vixl::aarch64::Operand CPU::ARM64Recompiler::armCheckAddSubConstant(s32 val) { if (Assembler::IsImmAddSub(val)) return vixl::aarch64::Operand(static_cast(val)); @@ -642,12 +664,12 @@ vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckAddSubConstant( return vixl::aarch64::Operand(RWSCRATCH); } -vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckAddSubConstant(u32 val) +vixl::aarch64::Operand CPU::ARM64Recompiler::armCheckAddSubConstant(u32 val) { return armCheckAddSubConstant(static_cast(val)); } -vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckCompareConstant(s32 val) +vixl::aarch64::Operand CPU::ARM64Recompiler::armCheckCompareConstant(s32 val) { if (Assembler::IsImmConditionalCompare(val)) return vixl::aarch64::Operand(static_cast(val)); @@ -656,7 +678,7 @@ vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckCompareConstant return vixl::aarch64::Operand(RWSCRATCH); } -vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckLogicalConstant(u32 val) +vixl::aarch64::Operand CPU::ARM64Recompiler::armCheckLogicalConstant(u32 val) { if (Assembler::IsImmLogical(val, 32)) return vixl::aarch64::Operand(static_cast(static_cast(val))); @@ -665,12 +687,12 @@ vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckLogicalConstant return vixl::aarch64::Operand(RWSCRATCH); } -void CPU::Recompiler::ARM64Recompiler::BeginBlock() +void CPU::ARM64Recompiler::BeginBlock() { Recompiler::BeginBlock(); } -void CPU::Recompiler::ARM64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::ARM64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset armMoveAddressToReg(armAsm, RXARG1, ram_ptr); @@ -733,7 +755,7 @@ void CPU::Recompiler::ARM64Recompiler::GenerateBlockProtectCheck(const u8* ram_p armAsm->bind(&block_unchanged); } -void CPU::Recompiler::ARM64Recompiler::GenerateICacheCheckAndUpdate() +void CPU::ARM64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -789,8 +811,8 @@ void CPU::Recompiler::ARM64Recompiler::GenerateICacheCheckAndUpdate() } } -void CPU::Recompiler::ARM64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::ARM64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RXARG1.GetCode())) armAsm->mov(RXARG1, XRegister(arg1reg)); @@ -801,7 +823,7 @@ void CPU::Recompiler::ARM64Recompiler::GenerateCall(const void* func, s32 arg1re EmitCall(func); } -void CPU::Recompiler::ARM64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::ARM64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -818,7 +840,7 @@ void CPU::Recompiler::ARM64Recompiler::EndBlock(const std::optional& newpc, EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::Recompiler::ARM64Recompiler::EndBlockWithException(Exception excode) +void CPU::ARM64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -836,8 +858,7 @@ void CPU::Recompiler::ARM64Recompiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::Recompiler::ARM64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::ARM64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, bool force_run_events) { // event test // pc should've been flushed @@ -894,7 +915,7 @@ void CPU::Recompiler::ARM64Recompiler::EndAndLinkBlock(const std::optional& } } -const void* CPU::Recompiler::ARM64Recompiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::ARM64Recompiler::EndCompile(u32* code_size, u32* far_code_size) { #ifdef VIXL_DEBUG m_emitter_check.reset(); @@ -911,7 +932,7 @@ const void* CPU::Recompiler::ARM64Recompiler::EndCompile(u32* code_size, u32* fa return code; } -const char* CPU::Recompiler::ARM64Recompiler::GetHostRegName(u32 reg) const +const char* CPU::ARM64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", @@ -919,22 +940,22 @@ const char* CPU::Recompiler::ARM64Recompiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::Recompiler::ARM64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::ARM64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(WRegister(reg), val); } -void CPU::Recompiler::ARM64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::ARM64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { armAsm->ldr(WRegister(reg), PTR(ptr)); } -void CPU::Recompiler::ARM64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::ARM64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { armAsm->str(WRegister(reg), PTR(ptr)); } -void CPU::Recompiler::ARM64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::ARM64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { if (val == 0) { @@ -946,59 +967,59 @@ void CPU::Recompiler::ARM64Recompiler::StoreConstantToCPUPointer(u32 val, const armAsm->str(RWSCRATCH, PTR(ptr)); } -void CPU::Recompiler::ARM64Recompiler::CopyHostReg(u32 dst, u32 src) +void CPU::ARM64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) armAsm->mov(WRegister(dst), WRegister(src)); } -void CPU::Recompiler::ARM64Recompiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::ARM64Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::Recompiler::ARM64Recompiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::ARM64Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -vixl::aarch64::MemOperand CPU::Recompiler::ARM64Recompiler::MipsPtr(Reg r) const +vixl::aarch64::MemOperand CPU::ARM64Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return PTR(&g_state.regs.r[static_cast(r)]); } -vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegD(CompileFlags cf) const +vixl::aarch64::Register CPU::ARM64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return WRegister(cf.host_d); } -vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegS(CompileFlags cf) const +vixl::aarch64::Register CPU::ARM64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return WRegister(cf.host_s); } -vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegT(CompileFlags cf) const +vixl::aarch64::Register CPU::ARM64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return WRegister(cf.host_t); } -vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegLO(CompileFlags cf) const +vixl::aarch64::Register CPU::ARM64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return WRegister(cf.host_lo); } -vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegHI(CompileFlags cf) const +vixl::aarch64::Register CPU::ARM64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return WRegister(cf.host_hi); } -void CPU::Recompiler::ARM64Recompiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf) +void CPU::ARM64Recompiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf) { DebugAssert(dst.IsW()); if (cf.valid_host_s) @@ -1021,7 +1042,7 @@ void CPU::Recompiler::ARM64Recompiler::MoveSToReg(const vixl::aarch64::Register& } } -void CPU::Recompiler::ARM64Recompiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf) +void CPU::ARM64Recompiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf) { DebugAssert(dst.IsW()); if (cf.valid_host_t) @@ -1044,7 +1065,7 @@ void CPU::Recompiler::ARM64Recompiler::MoveTToReg(const vixl::aarch64::Register& } } -void CPU::Recompiler::ARM64Recompiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg) +void CPU::ARM64Recompiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg) { DebugAssert(reg < Reg::count && dst.IsW()); if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) @@ -1055,9 +1076,8 @@ void CPU::Recompiler::ARM64Recompiler::MoveMIPSRegToReg(const vixl::aarch64::Reg armAsm->ldr(dst, MipsPtr(reg)); } -void CPU::Recompiler::ARM64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::ARM64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -1072,7 +1092,7 @@ void CPU::Recompiler::ARM64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* EmitCall(func); } -void CPU::Recompiler::ARM64Recompiler::Flush(u32 flags) +void CPU::ARM64Recompiler::Flush(u32 flags) { Recompiler::Flush(flags); @@ -1164,13 +1184,13 @@ void CPU::Recompiler::ARM64Recompiler::Flush(u32 flags) } } -void CPU::Recompiler::ARM64Recompiler::Compile_Fallback() +void CPU::ARM64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); Flush(FLUSH_FOR_INTERPRETER); - EmitCall(reinterpret_cast(&CPU::Recompiler::Thunks::InterpretInstruction)); + EmitCall(reinterpret_cast(&CPU::RecompilerThunks::InterpretInstruction)); // TODO: make me less garbage // TODO: this is wrong, it flushes the load delay on the same cycle when we return. @@ -1189,7 +1209,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::Recompiler::ARM64Recompiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg) +void CPU::ARM64Recompiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg) { DebugAssert(pcreg.IsW()); if (!g_settings.cpu_recompiler_memory_exceptions) @@ -1205,7 +1225,7 @@ void CPU::Recompiler::ARM64Recompiler::CheckBranchTarget(const vixl::aarch64::Re SwitchToNearCode(false); } -void CPU::Recompiler::ARM64Recompiler::Compile_jr(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_jr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -1216,7 +1236,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_jalr(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_jalr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -1229,7 +1249,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::ARM64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -1310,7 +1330,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_bxx(CompileFlags cf, BranchCondit EndBlock(taken_pc, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::ARM64Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -1332,33 +1352,33 @@ void CPU::Recompiler::ARM64Recompiler::Compile_addi(CompileFlags cf, bool overfl } } -void CPU::Recompiler::ARM64Recompiler::Compile_addi(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::ARM64Recompiler::Compile_addiu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_slti(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_sltiu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::ARM64Recompiler::Compile_slti(CompileFlags cf, bool sign) { armAsm->cmp(CFGetRegS(cf), armCheckCompareConstant(static_cast(inst->i.imm_sext32()))); armAsm->cset(CFGetRegT(cf), sign ? lt : lo); } -void CPU::Recompiler::ARM64Recompiler::Compile_andi(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_andi(CompileFlags cf) { const Register rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -1367,7 +1387,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_andi(CompileFlags cf) armAsm->mov(rt, wzr); } -void CPU::Recompiler::ARM64Recompiler::Compile_ori(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_ori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -1377,7 +1397,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_ori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::Recompiler::ARM64Recompiler::Compile_xori(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_xori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -1387,9 +1407,9 @@ void CPU::Recompiler::ARM64Recompiler::Compile_xori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::Recompiler::ARM64Recompiler::Compile_shift( - CompileFlags cf, - void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, unsigned)) +void CPU::ARM64Recompiler::Compile_shift(CompileFlags cf, + void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, + const vixl::aarch64::Register&, unsigned)) { const Register rd = CFGetRegD(cf); const Register rt = CFGetRegT(cf); @@ -1399,22 +1419,22 @@ void CPU::Recompiler::ARM64Recompiler::Compile_shift( armAsm->mov(rd, rt); } -void CPU::Recompiler::ARM64Recompiler::Compile_sll(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::lsl); } -void CPU::Recompiler::ARM64Recompiler::Compile_srl(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::lsr); } -void CPU::Recompiler::ARM64Recompiler::Compile_sra(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::asr); } -void CPU::Recompiler::ARM64Recompiler::Compile_variable_shift( +void CPU::ARM64Recompiler::Compile_variable_shift( CompileFlags cf, void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, const vixl::aarch64::Register&), @@ -1442,22 +1462,22 @@ void CPU::Recompiler::ARM64Recompiler::Compile_variable_shift( } } -void CPU::Recompiler::ARM64Recompiler::Compile_sllv(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lslv, &Assembler::lsl); } -void CPU::Recompiler::ARM64Recompiler::Compile_srlv(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsrv, &Assembler::lsr); } -void CPU::Recompiler::ARM64Recompiler::Compile_srav(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::asrv, &Assembler::asr); } -void CPU::Recompiler::ARM64Recompiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::ARM64Recompiler::Compile_mult(CompileFlags cf, bool sign) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1475,17 +1495,17 @@ void CPU::Recompiler::ARM64Recompiler::Compile_mult(CompileFlags cf, bool sign) armAsm->lsr(hi.X(), lo.X(), 32); } -void CPU::Recompiler::ARM64Recompiler::Compile_mult(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_multu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_div(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_div(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1530,7 +1550,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_div(CompileFlags cf) armAsm->bind(&done); } -void CPU::Recompiler::ARM64Recompiler::Compile_divu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_divu(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1560,7 +1580,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_divu(CompileFlags cf) armAsm->bind(&done); } -void CPU::Recompiler::ARM64Recompiler::TestOverflow(const vixl::aarch64::Register& result) +void CPU::ARM64Recompiler::TestOverflow(const vixl::aarch64::Register& result) { DebugAssert(result.IsW()); SwitchToFarCode(true, vs); @@ -1577,11 +1597,11 @@ void CPU::Recompiler::ARM64Recompiler::TestOverflow(const vixl::aarch64::Registe SwitchToNearCode(false); } -void CPU::Recompiler::ARM64Recompiler::Compile_dst_op( - CompileFlags cf, - void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, - const vixl::aarch64::Operand&), - bool commutative, bool logical, bool overflow) +void CPU::ARM64Recompiler::Compile_dst_op(CompileFlags cf, + void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, + const vixl::aarch64::Register&, + const vixl::aarch64::Operand&), + bool commutative, bool logical, bool overflow) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1630,7 +1650,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_dst_op( TestOverflow(rd); } -void CPU::Recompiler::ARM64Recompiler::Compile_add(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_add(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::adds, true, false, true); @@ -1638,12 +1658,12 @@ void CPU::Recompiler::ARM64Recompiler::Compile_add(CompileFlags cf) Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_addu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_sub(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sub(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::subs, false, false, true); @@ -1651,12 +1671,12 @@ void CPU::Recompiler::ARM64Recompiler::Compile_sub(CompileFlags cf) Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_subu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_and(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1677,7 +1697,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &Assembler::and_, true, true, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_or(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1693,7 +1713,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &Assembler::orr, true, true, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_xor(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1715,23 +1735,23 @@ void CPU::Recompiler::ARM64Recompiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &Assembler::eor, true, true, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_nor(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::Recompiler::ARM64Recompiler::Compile_slt(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::Recompiler::ARM64Recompiler::Compile_sltu(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::Recompiler::ARM64Recompiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::ARM64Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1755,9 +1775,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_slt(CompileFlags cf, bool sign) } vixl::aarch64::Register -CPU::Recompiler::ARM64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, - const std::optional& address, - const std::optional& reg) +CPU::ARM64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional& address, + const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1797,9 +1816,9 @@ CPU::Recompiler::ARM64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, } template -vixl::aarch64::Register -CPU::Recompiler::ARM64Recompiler::GenerateLoad(const vixl::aarch64::Register& addr_reg, MemoryAccessSize size, - bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) +vixl::aarch64::Register CPU::ARM64Recompiler::GenerateLoad(const vixl::aarch64::Register& addr_reg, + MemoryAccessSize size, bool sign, bool use_fastmem, + const RegAllocFn& dst_reg_alloc) { DebugAssert(addr_reg.IsW()); if (use_fastmem) @@ -1845,20 +1864,20 @@ CPU::Recompiler::ARM64Recompiler::GenerateLoad(const vixl::aarch64::Register& ad { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::ReadMemoryByte) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::ReadMemoryHalfWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::ReadMemoryWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryWord)); } break; } @@ -1913,9 +1932,9 @@ CPU::Recompiler::ARM64Recompiler::GenerateLoad(const vixl::aarch64::Register& ad return dst_reg; } -void CPU::Recompiler::ARM64Recompiler::GenerateStore(const vixl::aarch64::Register& addr_reg, - const vixl::aarch64::Register& value_reg, MemoryAccessSize size, - bool use_fastmem) +void CPU::ARM64Recompiler::GenerateStore(const vixl::aarch64::Register& addr_reg, + const vixl::aarch64::Register& value_reg, MemoryAccessSize size, + bool use_fastmem) { DebugAssert(addr_reg.IsW() && value_reg.IsW()); if (use_fastmem) @@ -1958,20 +1977,20 @@ void CPU::Recompiler::ARM64Recompiler::GenerateStore(const vixl::aarch64::Regist { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::WriteMemoryByte) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::WriteMemoryHalfWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); + EmitCall(checked ? reinterpret_cast(&CPU::RecompilerThunks::WriteMemoryWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryWord)); } break; } @@ -2002,8 +2021,8 @@ void CPU::Recompiler::ARM64Recompiler::GenerateStore(const vixl::aarch64::Regist } } -void CPU::Recompiler::ARM64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -2031,8 +2050,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2125,8 +2144,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2211,8 +2230,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAcces } } -void CPU::Recompiler::ARM64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -2239,8 +2258,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2313,8 +2332,8 @@ void CPU::Recompiler::ARM64Recompiler::Compile_swx(CompileFlags cf, MemoryAccess } } -void CPU::Recompiler::ARM64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::ARM64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2369,7 +2388,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_swc2(CompileFlags cf, MemoryAcces } } -void CPU::Recompiler::ARM64Recompiler::Compile_mtc0(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -2445,7 +2464,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_mtc0(CompileFlags cf) } } -void CPU::Recompiler::ARM64Recompiler::Compile_rfe(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -2455,7 +2474,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_rfe(CompileFlags cf) TestInterrupts(RWARG1); } -void CPU::Recompiler::ARM64Recompiler::TestInterrupts(const vixl::aarch64::Register& sr) +void CPU::ARM64Recompiler::TestInterrupts(const vixl::aarch64::Register& sr) { DebugAssert(sr.IsW()); @@ -2506,7 +2525,7 @@ void CPU::Recompiler::ARM64Recompiler::TestInterrupts(const vixl::aarch64::Regis armAsm->bind(&no_interrupt); } -void CPU::Recompiler::ARM64Recompiler::Compile_mfc2(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2547,7 +2566,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_mfc2(CompileFlags cf) } } -void CPU::Recompiler::ARM64Recompiler::Compile_mtc2(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2609,7 +2628,7 @@ void CPU::Recompiler::ARM64Recompiler::Compile_mtc2(CompileFlags cf) } } -void CPU::Recompiler::ARM64Recompiler::Compile_cop2(CompileFlags cf) +void CPU::ARM64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2686,24 +2705,24 @@ u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, vo case MemoryAccessSize::Byte: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte), + is_load ? reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryByte) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryByte), false); } break; case MemoryAccessSize::HalfWord: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord), + is_load ? reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryHalfWord), false); } break; case MemoryAccessSize::Word: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord), + is_load ? reinterpret_cast(&CPU::RecompilerThunks::UncheckedReadMemoryWord) : + reinterpret_cast(&CPU::RecompilerThunks::UncheckedWriteMemoryWord), false); } break; diff --git a/src/core/cpu_recompiler_arm64.h b/src/core/cpu_recompiler_arm64.h index 44bb4001a..55011f145 100644 --- a/src/core/cpu_recompiler_arm64.h +++ b/src/core/cpu_recompiler_arm64.h @@ -11,7 +11,7 @@ #include "vixl/aarch64/assembler-aarch64.h" -namespace CPU::Recompiler { +namespace CPU { class ARM64Recompiler final : public Recompiler { @@ -166,6 +166,6 @@ private: #endif }; -} // namespace CPU::Recompiler +} // namespace CPU #endif // CPU_ARCH_ARM64 diff --git a/src/core/cpu_recompiler_riscv64.cpp b/src/core/cpu_recompiler_riscv64.cpp index 35b42290f..c3fad7499 100644 --- a/src/core/cpu_recompiler_riscv64.cpp +++ b/src/core/cpu_recompiler_riscv64.cpp @@ -5,7 +5,6 @@ #include "cpu_code_cache_private.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" -#include "cpu_recompiler_thunks.h" #include "gte.h" #include "settings.h" #include "timing_event.h" @@ -33,42 +32,53 @@ extern "C" { static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr -namespace CPU::Recompiler { +#define RRET biscuit::a0 +#define RARG1 biscuit::a0 +#define RARG2 biscuit::a1 +#define RARG3 biscuit::a2 +#define RSCRATCH biscuit::t6 +#define RSTATE biscuit::s10 +#define RMEMBASE biscuit::s11 + +static bool rvIsCallerSavedRegister(u32 id); +static bool rvIsValidSExtITypeImm(u32 imm); +static std::pair rvGetAddressImmediates(const void* cur, const void* target); +static void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr); +static void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm); +static void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm); +static u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero); +static u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr); +static void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, + bool sign_extend_word = false); +static void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, + const biscuit::GPR& tempreg = RSCRATCH); +static void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word +static void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word +static void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word +static void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word +static void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword +static void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword + +namespace CPU { using namespace biscuit; -using CPU::Recompiler::rvEmitCall; -using CPU::Recompiler::rvEmitDSExtW; -using CPU::Recompiler::rvEmitDUExtW; -using CPU::Recompiler::rvEmitFarLoad; -using CPU::Recompiler::rvEmitJmp; -using CPU::Recompiler::rvEmitMov; -using CPU::Recompiler::rvEmitMov64; -using CPU::Recompiler::rvEmitSExtB; -using CPU::Recompiler::rvEmitSExtH; -using CPU::Recompiler::rvEmitUExtB; -using CPU::Recompiler::rvEmitUExtH; -using CPU::Recompiler::rvGetAddressImmediates; -using CPU::Recompiler::rvIsCallerSavedRegister; -using CPU::Recompiler::rvIsValidSExtITypeImm; -using CPU::Recompiler::rvMoveAddressToReg; - RISCV64Recompiler s_instance; Recompiler* g_compiler = &s_instance; } // namespace CPU::Recompiler -bool CPU::Recompiler::rvIsCallerSavedRegister(u32 id) +bool rvIsCallerSavedRegister(u32 id) { return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31)); } -bool CPU::Recompiler::rvIsValidSExtITypeImm(u32 imm) +bool rvIsValidSExtITypeImm(u32 imm) { return (static_cast((static_cast(imm) << 20) >> 20) == imm); } -std::pair CPU::Recompiler::rvGetAddressImmediates(const void* cur, const void* target) +std::pair rvGetAddressImmediates(const void* cur, const void* target) { const s64 disp = static_cast(reinterpret_cast(target) - reinterpret_cast(cur)); Assert(disp >= static_cast(std::numeric_limits::min()) && @@ -79,14 +89,14 @@ std::pair CPU::Recompiler::rvGetAddressImmediates(const void* cur, con return std::make_pair(static_cast(hi >> 12), static_cast((lo << 52) >> 52)); } -void CPU::Recompiler::rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr) +void rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr) { const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); rvAsm->AUIPC(reg, hi); rvAsm->ADDI(reg, reg, lo); } -void CPU::Recompiler::rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm) +void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm) { // Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero. const u32 lower = imm & 0xFFF; @@ -105,8 +115,7 @@ void CPU::Recompiler::rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& r } } -void CPU::Recompiler::rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, - u64 imm) +void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm) { // TODO: Make better.. rvEmitMov(rvAsm, rd, static_cast(imm >> 32)); @@ -117,7 +126,7 @@ void CPU::Recompiler::rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rvAsm->ADD(rd, rd, scratch); } -u32 CPU::Recompiler::rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg) +u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg) { // TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr); @@ -126,13 +135,12 @@ u32 CPU::Recompiler::rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const return 8; } -u32 CPU::Recompiler::rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr) +u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr) { return rvEmitJmp(rvAsm, ptr, biscuit::ra); } -void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, - bool sign_extend_word) +void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word) { const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); rvAsm->AUIPC(reg, hi); @@ -142,43 +150,42 @@ void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GP rvAsm->LWU(reg, lo, reg); } -void CPU::Recompiler::rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, - const biscuit::GPR& tempreg) +[[maybe_unused]] void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, const biscuit::GPR& tempreg) { const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); rvAsm->AUIPC(tempreg, hi); rvAsm->SW(reg, lo, tempreg); } -void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->SLLI(rd, rs, 24); rvAsm->SRAIW(rd, rd, 24); } -void CPU::Recompiler::rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->ANDI(rd, rs, 0xFF); } -void CPU::Recompiler::rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->SLLI(rd, rs, 16); rvAsm->SRAIW(rd, rd, 16); } -void CPU::Recompiler::rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->SLLI(rd, rs, 16); rvAsm->SRLI(rd, rd, 16); } -void CPU::Recompiler::rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->ADDIW(rd, rs, 0); } -void CPU::Recompiler::rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) +void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) { rvAsm->SLLI64(rd, rs, 32); rvAsm->SRLI64(rd, rd, 32); @@ -227,7 +234,6 @@ u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) { - using namespace CPU::Recompiler; using namespace biscuit; Assembler actual_asm(static_cast(code), code_size); @@ -246,12 +252,6 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) if (IsUsingFastmem()) rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base)); - // Downcount isn't set on entry, so we need to initialize it - rvMoveAddressToReg(rvAsm, RARG1, TimingEvents::GetHeadEventPtr()); - rvAsm->LD(RARG1, 0, RARG1); - rvAsm->LW(RARG1, OFFSETOF(TimingEvent, m_downcount), RARG1); - rvAsm->SW(RARG1, PTR(&g_state.downcount)); - // Fall through to event dispatcher } @@ -319,7 +319,7 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) // TODO: get rid of assembler construction here { biscuit::Assembler assembler(static_cast(code), BLOCK_LINK_SIZE); - CPU::Recompiler::rvEmitCall(&assembler, dst); + rvEmitCall(&assembler, dst); DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE); if (assembler.GetCodeBuffer().GetRemainingBytes() > 0) @@ -332,17 +332,17 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) return BLOCK_LINK_SIZE; } -CPU::Recompiler::RISCV64Recompiler::RISCV64Recompiler() = default; +CPU::RISCV64Recompiler::RISCV64Recompiler() = default; -CPU::Recompiler::RISCV64Recompiler::~RISCV64Recompiler() = default; +CPU::RISCV64Recompiler::~RISCV64Recompiler() = default; -const void* CPU::Recompiler::RISCV64Recompiler::GetCurrentCodePointer() +const void* CPU::RISCV64Recompiler::GetCurrentCodePointer() { return rvAsm->GetCursorPointer(); } -void CPU::Recompiler::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, + u32 far_code_space) { Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); @@ -370,10 +370,11 @@ void CPU::Recompiler::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code } } -void CPU::Recompiler::RISCV64Recompiler::SwitchToFarCode( - bool emit_jump, - void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) /* = nullptr */, - const biscuit::GPR& rs1 /* = biscuit::zero */, const biscuit::GPR& rs2 /* = biscuit::zero */) +void CPU::RISCV64Recompiler::SwitchToFarCode(bool emit_jump, + void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, + biscuit::Label*) /* = nullptr */, + const biscuit::GPR& rs1 /* = biscuit::zero */, + const biscuit::GPR& rs2 /* = biscuit::zero */) { DebugAssert(rvAsm == m_emitter.get()); if (emit_jump) @@ -394,7 +395,7 @@ void CPU::Recompiler::RISCV64Recompiler::SwitchToFarCode( rvAsm = m_far_emitter.get(); } -void CPU::Recompiler::RISCV64Recompiler::SwitchToNearCode(bool emit_jump) +void CPU::RISCV64Recompiler::SwitchToNearCode(bool emit_jump) { DebugAssert(rvAsm == m_far_emitter.get()); if (emit_jump) @@ -402,19 +403,19 @@ void CPU::Recompiler::RISCV64Recompiler::SwitchToNearCode(bool emit_jump) rvAsm = m_emitter.get(); } -void CPU::Recompiler::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val) +void CPU::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val) { rvEmitMov(rvAsm, dst, val); } -void CPU::Recompiler::RISCV64Recompiler::EmitCall(const void* ptr) +void CPU::RISCV64Recompiler::EmitCall(const void* ptr) { rvEmitCall(rvAsm, ptr); } -void CPU::Recompiler::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, - void (biscuit::Assembler::*iop)(GPR, GPR, u32), - void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) +void CPU::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, + void (biscuit::Assembler::*iop)(GPR, GPR, u32), + void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) { DebugAssert(rd != RSCRATCH && rs != RSCRATCH); @@ -428,83 +429,83 @@ void CPU::Recompiler::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd (rvAsm->*rop)(rd, rs, RSCRATCH); } -void CPU::Recompiler::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::ADDI), &Assembler::ADD); } -void CPU::Recompiler::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::ADDIW), &Assembler::ADDW); } -void CPU::Recompiler::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { const u32 nimm = static_cast(-static_cast(imm)); SafeImmSExtIType(rd, rs, nimm, reinterpret_cast(&Assembler::ADDIW), &Assembler::ADDW); } -void CPU::Recompiler::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND); } -void CPU::Recompiler::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR); } -void CPU::Recompiler::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR); } -void CPU::Recompiler::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::SLTI), &Assembler::SLT); } -void CPU::Recompiler::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::SLTIU), &Assembler::SLTU); } -void CPU::Recompiler::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitSExtB(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitUExtB(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitSExtH(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitUExtH(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitDSExtW(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitDUExtW(rvAsm, rd, rs); } -void CPU::Recompiler::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset // TODO: 64-bit displacement is needed :/ @@ -543,7 +544,7 @@ void CPU::Recompiler::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram rvAsm->Bind(&block_unchanged); } -void CPU::Recompiler::RISCV64Recompiler::GenerateICacheCheckAndUpdate() +void CPU::RISCV64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -599,8 +600,8 @@ void CPU::Recompiler::RISCV64Recompiler::GenerateICacheCheckAndUpdate() } } -void CPU::Recompiler::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RARG1.Index())) rvAsm->MV(RARG1, GPR(arg1reg)); @@ -611,7 +612,7 @@ void CPU::Recompiler::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1 EmitCall(func); } -void CPU::Recompiler::RISCV64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::RISCV64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -628,7 +629,7 @@ void CPU::Recompiler::RISCV64Recompiler::EndBlock(const std::optional& newp EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::Recompiler::RISCV64Recompiler::EndBlockWithException(Exception excode) +void CPU::RISCV64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -646,8 +647,7 @@ void CPU::Recompiler::RISCV64Recompiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::Recompiler::RISCV64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::RISCV64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, bool force_run_events) { // event test // pc should've been flushed @@ -711,7 +711,7 @@ void CPU::Recompiler::RISCV64Recompiler::EndAndLinkBlock(const std::optionalGetBufferPointer(0); *code_size = static_cast(m_emitter->GetCodeBuffer().GetSizeInBytes()); @@ -722,7 +722,7 @@ const void* CPU::Recompiler::RISCV64Recompiler::EndCompile(u32* code_size, u32* return code; } -const char* CPU::Recompiler::RISCV64Recompiler::GetHostRegName(u32 reg) const +const char* CPU::RISCV64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", @@ -730,22 +730,22 @@ const char* CPU::Recompiler::RISCV64Recompiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::Recompiler::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(GPR(reg), val); } -void CPU::Recompiler::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { rvAsm->LW(GPR(reg), PTR(ptr)); } -void CPU::Recompiler::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { rvAsm->SW(GPR(reg), PTR(ptr)); } -void CPU::Recompiler::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { if (val == 0) { @@ -757,23 +757,23 @@ void CPU::Recompiler::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, cons rvAsm->SW(RSCRATCH, PTR(ptr)); } -void CPU::Recompiler::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src) +void CPU::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) rvAsm->MV(GPR(dst), GPR(src)); } -void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) +biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) { if (cf.valid_host_s) { @@ -795,7 +795,7 @@ biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, } } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) +biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) { if (cf.valid_host_t) { @@ -817,37 +817,37 @@ biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, } } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const +biscuit::GPR CPU::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return GPR(cf.host_d); } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const +biscuit::GPR CPU::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return GPR(cf.host_s); } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const +biscuit::GPR CPU::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return GPR(cf.host_t); } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const +biscuit::GPR CPU::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return GPR(cf.host_lo); } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const +biscuit::GPR CPU::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return GPR(cf.host_hi); } -void CPU::Recompiler::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) +void CPU::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -865,7 +865,7 @@ void CPU::Recompiler::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, Com } } -void CPU::Recompiler::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) +void CPU::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -883,7 +883,7 @@ void CPU::Recompiler::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, Com } } -void CPU::Recompiler::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) +void CPU::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) { DebugAssert(reg < Reg::count); if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) @@ -894,9 +894,8 @@ void CPU::Recompiler::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& ds rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast(reg)])); } -void CPU::Recompiler::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -911,7 +910,7 @@ void CPU::Recompiler::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void EmitCall(func); } -void CPU::Recompiler::RISCV64Recompiler::Flush(u32 flags) +void CPU::RISCV64Recompiler::Flush(u32 flags) { Recompiler::Flush(flags); @@ -1000,14 +999,14 @@ void CPU::Recompiler::RISCV64Recompiler::Flush(u32 flags) } } -void CPU::Recompiler::RISCV64Recompiler::Compile_Fallback() +void CPU::RISCV64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); Flush(FLUSH_FOR_INTERPRETER); #if 0 - cg->call(&CPU::Recompiler::Thunks::InterpretInstruction); + cg->call(&CPU::RecompilerThunks::InterpretInstruction); // TODO: make me less garbage // TODO: this is wrong, it flushes the load delay on the same cycle when we return. @@ -1028,7 +1027,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_Fallback() #endif } -void CPU::Recompiler::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg) +void CPU::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -1044,7 +1043,7 @@ void CPU::Recompiler::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& p SwitchToNearCode(false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_jr(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_jr(CompileFlags cf) { const GPR pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -1055,7 +1054,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_jalr(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_jalr(CompileFlags cf) { const GPR pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -1068,7 +1067,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -1146,7 +1145,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCond EndBlock(taken_pc, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const GPR rs = CFGetRegS(cf); const GPR rt = CFGetRegT(cf); @@ -1169,27 +1168,27 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool over } } -void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::RISCV64Recompiler::Compile_addiu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_sltiu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign) { if (sign) SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); @@ -1197,7 +1196,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); } -void CPU::Recompiler::RISCV64Recompiler::Compile_andi(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_andi(CompileFlags cf) { const GPR rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -1206,7 +1205,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_andi(CompileFlags cf) EmitMov(rt, 0); } -void CPU::Recompiler::RISCV64Recompiler::Compile_ori(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_ori(CompileFlags cf) { const GPR rt = CFGetRegT(cf); const GPR rs = CFGetRegS(cf); @@ -1216,7 +1215,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_ori(CompileFlags cf) rvAsm->MV(rt, rs); } -void CPU::Recompiler::RISCV64Recompiler::Compile_xori(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_xori(CompileFlags cf) { const GPR rt = CFGetRegT(cf); const GPR rs = CFGetRegS(cf); @@ -1226,9 +1225,9 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_xori(CompileFlags cf) rvAsm->MV(rt, rs); } -void CPU::Recompiler::RISCV64Recompiler::Compile_shift( - CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), - void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) +void CPU::RISCV64Recompiler::Compile_shift(CompileFlags cf, + void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), + void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) { const GPR rd = CFGetRegD(cf); const GPR rt = CFGetRegT(cf); @@ -1238,22 +1237,22 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_shift( rvAsm->MV(rd, rt); } -void CPU::Recompiler::RISCV64Recompiler::Compile_sll(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_srl(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_sra(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_variable_shift( +void CPU::RISCV64Recompiler::Compile_variable_shift( CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) { @@ -1279,22 +1278,22 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_variable_shift( } } -void CPU::Recompiler::RISCV64Recompiler::Compile_sllv(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_srlv(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_srav(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); } -void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign) { const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1325,17 +1324,17 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign } } -void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_multu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_div(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_div(CompileFlags cf) { // 36 Volume I: RISC-V User-Level ISA V2.2 const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; @@ -1375,7 +1374,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_div(CompileFlags cf) rvAsm->Bind(&done); } -void CPU::Recompiler::RISCV64Recompiler::Compile_divu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_divu(CompileFlags cf) { const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1393,8 +1392,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_divu(CompileFlags cf) rvAsm->REMUW(rhi, rs, rt); } -void CPU::Recompiler::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, - const biscuit::GPR& reg_to_discard) +void CPU::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, + const biscuit::GPR& reg_to_discard) { SwitchToFarCode(true, &Assembler::BEQ, long_res, res); @@ -1410,7 +1409,7 @@ void CPU::Recompiler::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_r SwitchToNearCode(false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_dst_op( +void CPU::RISCV64Recompiler::Compile_dst_op( CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow) @@ -1476,29 +1475,29 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_dst_op( } } -void CPU::Recompiler::RISCV64Recompiler::Compile_add(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_add(CompileFlags cf) { Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::RISCV64Recompiler::Compile_addu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_sub(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sub(CompileFlags cf) { Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::RISCV64Recompiler::Compile_subu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_and(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1519,7 +1518,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_or(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1535,7 +1534,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_xor(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1557,23 +1556,23 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_nor(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::Recompiler::RISCV64Recompiler::Compile_sltu(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1598,8 +1597,9 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign) } } -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::ComputeLoadStoreAddressArg( - CompileFlags cf, const std::optional& address, const std::optional& reg) +biscuit::GPR CPU::RISCV64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, + const std::optional& address, + const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1639,9 +1639,8 @@ biscuit::GPR CPU::Recompiler::RISCV64Recompiler::ComputeLoadStoreAddressArg( } template -biscuit::GPR CPU::Recompiler::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, - bool sign, bool use_fastmem, - const RegAllocFn& dst_reg_alloc) +biscuit::GPR CPU::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, + bool use_fastmem, const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1698,20 +1697,20 @@ biscuit::GPR CPU::Recompiler::RISCV64Recompiler::GenerateLoad(const biscuit::GPR { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord)); } break; } @@ -1770,8 +1769,8 @@ biscuit::GPR CPU::Recompiler::RISCV64Recompiler::GenerateLoad(const biscuit::GPR return dst_reg; } -void CPU::Recompiler::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, - MemoryAccessSize size, bool use_fastmem) +void CPU::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, + MemoryAccessSize size, bool use_fastmem) { if (use_fastmem) { @@ -1826,20 +1825,20 @@ void CPU::Recompiler::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_ { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + EmitCall(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord)); } break; } @@ -1870,9 +1869,8 @@ void CPU::Recompiler::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_ } } -void CPU::Recompiler::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ? std::optional(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1899,9 +1897,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAcce } } -void CPU::Recompiler::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1994,9 +1991,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAcce } } -void CPU::Recompiler::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2080,9 +2076,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAcc } } -void CPU::Recompiler::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -2108,9 +2103,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAcce } } -void CPU::Recompiler::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2183,9 +2177,8 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAcce } } -void CPU::Recompiler::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, - bool use_fastmem, - const std::optional& address) +void CPU::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2241,7 +2234,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAcc } } -void CPU::Recompiler::RISCV64Recompiler::Compile_mtc0(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -2321,7 +2314,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_mtc0(CompileFlags cf) } } -void CPU::Recompiler::RISCV64Recompiler::Compile_rfe(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -2334,7 +2327,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_rfe(CompileFlags cf) TestInterrupts(RARG1); } -void CPU::Recompiler::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr) +void CPU::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr) { DebugAssert(sr != RSCRATCH); @@ -2387,7 +2380,7 @@ void CPU::Recompiler::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr) rvAsm->Bind(&no_interrupt); } -void CPU::Recompiler::RISCV64Recompiler::Compile_mfc2(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2427,7 +2420,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_mfc2(CompileFlags cf) } } -void CPU::Recompiler::RISCV64Recompiler::Compile_mtc2(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2489,7 +2482,7 @@ void CPU::Recompiler::RISCV64Recompiler::Compile_mtc2(CompileFlags cf) } } -void CPU::Recompiler::RISCV64Recompiler::Compile_cop2(CompileFlags cf) +void CPU::RISCV64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2559,20 +2552,20 @@ u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, vo { case MemoryAccessSize::Byte: { - rvEmitCall(rvAsm, is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + rvEmitCall(rvAsm, is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - rvEmitCall(rvAsm, is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + rvEmitCall(rvAsm, is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - rvEmitCall(rvAsm, is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + rvEmitCall(rvAsm, is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord)); } break; } diff --git a/src/core/cpu_recompiler_riscv64.h b/src/core/cpu_recompiler_riscv64.h index 58f0a860e..69b7a2236 100644 --- a/src/core/cpu_recompiler_riscv64.h +++ b/src/core/cpu_recompiler_riscv64.h @@ -9,7 +9,9 @@ #ifdef CPU_ARCH_RISCV64 -namespace CPU::Recompiler { +#include "biscuit/assembler.hpp" + +namespace CPU { class RISCV64Recompiler final : public Recompiler { @@ -171,6 +173,6 @@ private: biscuit::Assembler* rvAsm; }; -} // namespace CPU::Recompiler +} // namespace CPU #endif // CPU_ARCH_RISCV64 diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h deleted file mode 100644 index 61272c708..000000000 --- a/src/core/cpu_recompiler_thunks.h +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "cpu_code_cache.h" -#include "cpu_types.h" - -namespace CPU::Recompiler::Thunks { - -////////////////////////////////////////////////////////////////////////// -// Trampolines for calling back from the JIT -// Needed because we can't cast member functions to void*... -// TODO: Abuse carry flag or something else for exception -////////////////////////////////////////////////////////////////////////// -bool InterpretInstruction(); -bool InterpretInstructionPGXP(); - -// Memory access functions for the JIT - MSB is set on exception. -u64 ReadMemoryByte(u32 address); -u64 ReadMemoryHalfWord(u32 address); -u64 ReadMemoryWord(u32 address); -u32 WriteMemoryByte(u32 address, u32 value); -u32 WriteMemoryHalfWord(u32 address, u32 value); -u32 WriteMemoryWord(u32 address, u32 value); - -// Unchecked memory access variants. No alignment or bus exceptions. -u32 UncheckedReadMemoryByte(u32 address); -u32 UncheckedReadMemoryHalfWord(u32 address); -u32 UncheckedReadMemoryWord(u32 address); -void UncheckedWriteMemoryByte(u32 address, u32 value); -void UncheckedWriteMemoryHalfWord(u32 address, u32 value); -void UncheckedWriteMemoryWord(u32 address, u32 value); - -} // namespace CPU::Recompiler::Thunks diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h deleted file mode 100644 index 5a2f9ad51..000000000 --- a/src/core/cpu_recompiler_types.h +++ /dev/null @@ -1,182 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -// Shared code between recompiler backends. - -#pragma once - -#include "cpu_types.h" - -#include - -#if defined(CPU_ARCH_X64) - -// We need to include windows.h before xbyak does.. -#ifdef _WIN32 -#include "common/windows_headers.h" -#endif - -#define XBYAK_NO_OP_NAMES 1 -#include "xbyak.h" - -namespace CPU::Recompiler { - -// A reasonable "maximum" number of bytes per instruction. -constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; -constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; - -// ABI selection -#if defined(_WIN32) -#define ABI_WIN64 1 - -#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX) -#define RWARG1 Xbyak::Reg32(Xbyak::Operand::RCX) -#define RWARG2 Xbyak::Reg32(Xbyak::Operand::RDX) -#define RWARG3 Xbyak::Reg32(Xbyak::Operand::R8D) -#define RWARG4 Xbyak::Reg32(Xbyak::Operand::R9D) -#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX) -#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RCX) -#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RDX) -#define RXARG3 Xbyak::Reg64(Xbyak::Operand::R8) -#define RXARG4 Xbyak::Reg64(Xbyak::Operand::R9) - -static constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32; - -#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__) -#define ABI_SYSV 1 - -#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX) -#define RWARG1 Xbyak::Reg32(Xbyak::Operand::EDI) -#define RWARG2 Xbyak::Reg32(Xbyak::Operand::ESI) -#define RWARG3 Xbyak::Reg32(Xbyak::Operand::EDX) -#define RWARG4 Xbyak::Reg32(Xbyak::Operand::ECX) -#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX) -#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RDI) -#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RSI) -#define RXARG3 Xbyak::Reg64(Xbyak::Operand::RDX) -#define RXARG4 Xbyak::Reg64(Xbyak::Operand::RCX) - -static constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 0; - -#else -#error Unknown ABI. -#endif - -bool IsCallerSavedRegister(u32 id); - -} // namespace CPU::Recompiler - -#elif defined(CPU_ARCH_ARM32) - -#include "vixl/aarch32/assembler-aarch32.h" -#include "vixl/aarch32/constants-aarch32.h" -#include "vixl/aarch32/instructions-aarch32.h" - -namespace CPU::Recompiler { - -// A reasonable "maximum" number of bytes per instruction. -constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; -constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; - -#define RRET vixl::aarch32::r0 -#define RRETHI vixl::aarch32::r1 -#define RARG1 vixl::aarch32::r0 -#define RARG2 vixl::aarch32::r1 -#define RARG3 vixl::aarch32::r2 -#define RSCRATCH vixl::aarch32::r12 -#define RSTATE vixl::aarch32::r4 - -bool armIsCallerSavedRegister(u32 id); -s32 armGetPCDisplacement(const void* current, const void* target); -bool armIsPCDisplacementInImmediateRange(s32 displacement); -void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr); -void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm); -void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); -void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); -void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr); -void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr); -void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr, - const vixl::aarch32::Register& tempreg = RSCRATCH); -u8* armGetJumpTrampoline(const void* target); - -} // namespace CPU::Recompiler - -#elif defined(CPU_ARCH_ARM64) - -#include "vixl/aarch64/assembler-aarch64.h" -#include "vixl/aarch64/constants-aarch64.h" - -namespace CPU::Recompiler { - -// A reasonable "maximum" number of bytes per instruction. -constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; -constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; - -#define RWRET vixl::aarch64::w0 -#define RXRET vixl::aarch64::x0 -#define RWARG1 vixl::aarch64::w0 -#define RXARG1 vixl::aarch64::x0 -#define RWARG2 vixl::aarch64::w1 -#define RXARG2 vixl::aarch64::x1 -#define RWARG3 vixl::aarch64::w2 -#define RXARG3 vixl::aarch64::x2 -#define RWSCRATCH vixl::aarch64::w16 -#define RXSCRATCH vixl::aarch64::x16 -#define RSTATE vixl::aarch64::x19 -#define RMEMBASE vixl::aarch64::x20 - -bool armIsCallerSavedRegister(u32 id); -s64 armGetPCDisplacement(const void* current, const void* target); -bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr); -void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr); -void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm); -void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); -void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); -void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr); -void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, - bool sign_extend_word = false); -void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, - const vixl::aarch64::Register& tempreg = RXSCRATCH); -u8* armGetJumpTrampoline(const void* target); - -} // namespace CPU::Recompiler - -#elif defined(CPU_ARCH_RISCV64) - -#include "biscuit/assembler.hpp" - -namespace CPU::Recompiler { - -// A reasonable "maximum" number of bytes per instruction. -constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; -constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; - -#define RRET biscuit::a0 -#define RARG1 biscuit::a0 -#define RARG2 biscuit::a1 -#define RARG3 biscuit::a2 -#define RSCRATCH biscuit::t6 -#define RSTATE biscuit::s10 -#define RMEMBASE biscuit::s11 - -bool rvIsCallerSavedRegister(u32 id); -bool rvIsValidSExtITypeImm(u32 imm); -std::pair rvGetAddressImmediates(const void* cur, const void* target); -void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr); -void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm); -void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm); -u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero); -u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr); -void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word = false); -void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, - const biscuit::GPR& tempreg = RSCRATCH); -void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word -void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word -void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word -void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word -void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword -void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword - -} // namespace CPU::Recompiler - -#endif diff --git a/src/core/cpu_recompiler_x64.cpp b/src/core/cpu_recompiler_x64.cpp index ad534a3a1..7351eb8ed 100644 --- a/src/core/cpu_recompiler_x64.cpp +++ b/src/core/cpu_recompiler_x64.cpp @@ -5,8 +5,6 @@ #include "cpu_code_cache_private.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" -#include "cpu_recompiler_thunks.h" -#include "cpu_recompiler_types.h" #include "gte.h" #include "settings.h" #include "timing_event.h" @@ -37,25 +35,58 @@ LOG_CHANNEL(Recompiler); // PGXP TODO: LWL etc, MFC0 // PGXP TODO: Spyro 1 level gates have issues. -namespace CPU::Recompiler { - -using namespace Xbyak; - static constexpr u32 BACKPATCH_JMP_SIZE = 5; +static bool IsCallerSavedRegister(u32 id); + +// ABI selection +#if defined(_WIN32) + +#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX) +#define RWARG1 Xbyak::Reg32(Xbyak::Operand::RCX) +#define RWARG2 Xbyak::Reg32(Xbyak::Operand::RDX) +#define RWARG3 Xbyak::Reg32(Xbyak::Operand::R8D) +#define RWARG4 Xbyak::Reg32(Xbyak::Operand::R9D) +#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX) +#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RCX) +#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RDX) +#define RXARG3 Xbyak::Reg64(Xbyak::Operand::R8) +#define RXARG4 Xbyak::Reg64(Xbyak::Operand::R9) + // on win32, we need to reserve an additional 32 bytes shadow space when calling out to C -#ifdef _WIN32 static constexpr u32 STACK_SHADOW_SIZE = 32; -#else + +#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__) + +#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX) +#define RWARG1 Xbyak::Reg32(Xbyak::Operand::EDI) +#define RWARG2 Xbyak::Reg32(Xbyak::Operand::ESI) +#define RWARG3 Xbyak::Reg32(Xbyak::Operand::EDX) +#define RWARG4 Xbyak::Reg32(Xbyak::Operand::ECX) +#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX) +#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RDI) +#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RSI) +#define RXARG3 Xbyak::Reg64(Xbyak::Operand::RDX) +#define RXARG4 Xbyak::Reg64(Xbyak::Operand::RCX) + static constexpr u32 STACK_SHADOW_SIZE = 0; + +#else + +#error Unknown ABI. + #endif +namespace CPU { + +using namespace Xbyak; + static X64Recompiler s_instance; Recompiler* g_compiler = &s_instance; -} // namespace CPU::Recompiler +} // namespace CPU -bool CPU::Recompiler::IsCallerSavedRegister(u32 id) +bool IsCallerSavedRegister(u32 id) { #ifdef _WIN32 // The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile. @@ -330,12 +361,12 @@ u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) #endif // ENABLE_HOST_DISASSEMBLY -CPU::Recompiler::X64Recompiler::X64Recompiler() = default; +CPU::X64Recompiler::X64Recompiler() = default; -CPU::Recompiler::X64Recompiler::~X64Recompiler() = default; +CPU::X64Recompiler::~X64Recompiler() = default; -void CPU::Recompiler::X64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::X64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, + u32 far_code_space) { Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); @@ -366,7 +397,7 @@ void CPU::Recompiler::X64Recompiler::Reset(CodeCache::Block* block, u8* code_buf } } -void CPU::Recompiler::X64Recompiler::SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) +void CPU::X64Recompiler::SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) { DebugAssert(cg == m_emitter.get()); if (emit_jump) @@ -377,8 +408,7 @@ void CPU::Recompiler::X64Recompiler::SwitchToFarCode(bool emit_jump, void (Xbyak cg = m_far_emitter.get(); } -void CPU::Recompiler::X64Recompiler::SwitchToNearCode(bool emit_jump, - void (Xbyak::CodeGenerator::*jump_op)(const void*)) +void CPU::X64Recompiler::SwitchToNearCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) { DebugAssert(cg == m_far_emitter.get()); if (emit_jump) @@ -389,7 +419,7 @@ void CPU::Recompiler::X64Recompiler::SwitchToNearCode(bool emit_jump, cg = m_emitter.get(); } -void CPU::Recompiler::X64Recompiler::BeginBlock() +void CPU::X64Recompiler::BeginBlock() { Recompiler::BeginBlock(); @@ -408,7 +438,7 @@ void CPU::Recompiler::X64Recompiler::BeginBlock() #endif } -void CPU::Recompiler::X64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::X64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset cg->mov(RXARG1, static_cast(reinterpret_cast(ram_ptr))); @@ -459,7 +489,7 @@ void CPU::Recompiler::X64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr DebugAssert(size == 0); } -void CPU::Recompiler::X64Recompiler::GenerateICacheCheckAndUpdate() +void CPU::X64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -500,8 +530,8 @@ void CPU::Recompiler::X64Recompiler::GenerateICacheCheckAndUpdate() } } -void CPU::Recompiler::X64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::X64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RXARG1.getIdx())) cg->mov(RXARG1, Reg64(arg1reg)); @@ -512,7 +542,7 @@ void CPU::Recompiler::X64Recompiler::GenerateCall(const void* func, s32 arg1reg cg->call(func); } -void CPU::Recompiler::X64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::X64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -526,7 +556,7 @@ void CPU::Recompiler::X64Recompiler::EndBlock(const std::optional& newpc, b EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::Recompiler::X64Recompiler::EndBlockWithException(Exception excode) +void CPU::X64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -544,8 +574,7 @@ void CPU::Recompiler::X64Recompiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::Recompiler::X64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::X64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, bool force_run_events) { // event test // pc should've been flushed @@ -614,7 +643,7 @@ void CPU::Recompiler::X64Recompiler::EndAndLinkBlock(const std::optional& n } } -const void* CPU::Recompiler::X64Recompiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::X64Recompiler::EndCompile(u32* code_size, u32* far_code_size) { const void* code = m_emitter->getCode(); *code_size = static_cast(m_emitter->getSize()); @@ -625,81 +654,81 @@ const void* CPU::Recompiler::X64Recompiler::EndCompile(u32* code_size, u32* far_ return code; } -const void* CPU::Recompiler::X64Recompiler::GetCurrentCodePointer() +const void* CPU::X64Recompiler::GetCurrentCodePointer() { return cg->getCurr(); } -const char* CPU::Recompiler::X64Recompiler::GetHostRegName(u32 reg) const +const char* CPU::X64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::Recompiler::X64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::X64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { cg->mov(Reg32(reg), val); } -void CPU::Recompiler::X64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::X64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { cg->mov(Reg32(reg), cg->dword[PTR(ptr)]); } -void CPU::Recompiler::X64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::X64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { cg->mov(cg->dword[PTR(ptr)], Reg32(reg)); } -void CPU::Recompiler::X64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::X64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { cg->mov(cg->dword[PTR(ptr)], val); } -void CPU::Recompiler::X64Recompiler::CopyHostReg(u32 dst, u32 src) +void CPU::X64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) cg->mov(Reg32(dst), Reg32(src)); } -Xbyak::Address CPU::Recompiler::X64Recompiler::MipsPtr(Reg r) const +Xbyak::Address CPU::X64Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return cg->dword[PTR(&g_state.regs.r[static_cast(r)])]; } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegD(CompileFlags cf) const +Xbyak::Reg32 CPU::X64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return Reg32(cf.host_d); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegS(CompileFlags cf) const +Xbyak::Reg32 CPU::X64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return Reg32(cf.host_s); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegT(CompileFlags cf) const +Xbyak::Reg32 CPU::X64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return Reg32(cf.host_t); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegLO(CompileFlags cf) const +Xbyak::Reg32 CPU::X64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return Reg32(cf.host_lo); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegHI(CompileFlags cf) const +Xbyak::Reg32 CPU::X64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return Reg32(cf.host_hi); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToD(CompileFlags cf) +Xbyak::Reg32 CPU::X64Recompiler::MoveSToD(CompileFlags cf) { DebugAssert(cf.valid_host_d); DebugAssert(!cf.valid_host_t || cf.host_t != cf.host_d); @@ -710,7 +739,7 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToD(CompileFlags cf) return rd; } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToT(CompileFlags cf) +Xbyak::Reg32 CPU::X64Recompiler::MoveSToT(CompileFlags cf) { DebugAssert(cf.valid_host_t); @@ -736,7 +765,7 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToT(CompileFlags cf) return rt; } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveTToD(CompileFlags cf) +Xbyak::Reg32 CPU::X64Recompiler::MoveTToD(CompileFlags cf) { DebugAssert(cf.valid_host_d); DebugAssert(!cf.valid_host_s || cf.host_s != cf.host_d); @@ -746,7 +775,7 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveTToD(CompileFlags cf) return rd; } -void CPU::Recompiler::X64Recompiler::MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf) +void CPU::X64Recompiler::MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -767,7 +796,7 @@ void CPU::Recompiler::X64Recompiler::MoveSToReg(const Xbyak::Reg32& dst, Compile } } -void CPU::Recompiler::X64Recompiler::MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf) +void CPU::X64Recompiler::MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -788,7 +817,7 @@ void CPU::Recompiler::X64Recompiler::MoveTToReg(const Xbyak::Reg32& dst, Compile } } -void CPU::Recompiler::X64Recompiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg) +void CPU::X64Recompiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg) { DebugAssert(reg < Reg::count); if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) @@ -799,9 +828,8 @@ void CPU::Recompiler::X64Recompiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, R cg->mov(dst, MipsPtr(reg)); } -void CPU::Recompiler::X64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::X64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -816,7 +844,7 @@ void CPU::Recompiler::X64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* fu cg->call(func); } -void CPU::Recompiler::X64Recompiler::Flush(u32 flags) +void CPU::X64Recompiler::Flush(u32 flags) { Recompiler::Flush(flags); @@ -899,13 +927,13 @@ void CPU::Recompiler::X64Recompiler::Flush(u32 flags) } } -void CPU::Recompiler::X64Recompiler::Compile_Fallback() +void CPU::X64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); Flush(FLUSH_FOR_INTERPRETER); - cg->call(&CPU::Recompiler::Thunks::InterpretInstruction); + cg->call(&CPU::RecompilerThunks::InterpretInstruction); // TODO: make me less garbage // TODO: this is wrong, it flushes the load delay on the same cycle when we return. @@ -923,7 +951,7 @@ void CPU::Recompiler::X64Recompiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::Recompiler::X64Recompiler::CheckBranchTarget(const Xbyak::Reg32& pcreg) +void CPU::X64Recompiler::CheckBranchTarget(const Xbyak::Reg32& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -938,7 +966,7 @@ void CPU::Recompiler::X64Recompiler::CheckBranchTarget(const Xbyak::Reg32& pcreg SwitchToNearCode(false); } -void CPU::Recompiler::X64Recompiler::Compile_jr(CompileFlags cf) +void CPU::X64Recompiler::Compile_jr(CompileFlags cf) { if (!cf.valid_host_s) cg->mov(RWARG1, MipsPtr(cf.MipsS())); @@ -952,7 +980,7 @@ void CPU::Recompiler::X64Recompiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::X64Recompiler::Compile_jalr(CompileFlags cf) +void CPU::X64Recompiler::Compile_jalr(CompileFlags cf) { if (!cf.valid_host_s) cg->mov(RWARG1, MipsPtr(cf.MipsS())); @@ -969,7 +997,7 @@ void CPU::Recompiler::X64Recompiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::Recompiler::X64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::X64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { const u32 taken_pc = GetConditionalBranchTarget(cf); @@ -1045,7 +1073,7 @@ void CPU::Recompiler::X64Recompiler::Compile_bxx(CompileFlags cf, BranchConditio EndBlock(taken_pc, true); } -void CPU::Recompiler::X64Recompiler::Compile_addi(CompileFlags cf) +void CPU::X64Recompiler::Compile_addi(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_sext32(); imm != 0) @@ -1059,24 +1087,24 @@ void CPU::Recompiler::X64Recompiler::Compile_addi(CompileFlags cf) } } -void CPU::Recompiler::X64Recompiler::Compile_addiu(CompileFlags cf) +void CPU::X64Recompiler::Compile_addiu(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_sext32(); imm != 0) cg->add(rt, imm); } -void CPU::Recompiler::X64Recompiler::Compile_slti(CompileFlags cf) +void CPU::X64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::Recompiler::X64Recompiler::Compile_sltiu(CompileFlags cf) +void CPU::X64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::Recompiler::X64Recompiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::X64Recompiler::Compile_slti(CompileFlags cf, bool sign) { const Reg32 rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG1; @@ -1098,7 +1126,7 @@ void CPU::Recompiler::X64Recompiler::Compile_slti(CompileFlags cf, bool sign) cg->mov(MipsPtr(cf.MipsT()), rt); } -void CPU::Recompiler::X64Recompiler::Compile_andi(CompileFlags cf) +void CPU::X64Recompiler::Compile_andi(CompileFlags cf) { if (const u32 imm = inst->i.imm_zext32(); imm != 0) { @@ -1112,44 +1140,45 @@ void CPU::Recompiler::X64Recompiler::Compile_andi(CompileFlags cf) } } -void CPU::Recompiler::X64Recompiler::Compile_ori(CompileFlags cf) +void CPU::X64Recompiler::Compile_ori(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) cg->or_(rt, imm); } -void CPU::Recompiler::X64Recompiler::Compile_xori(CompileFlags cf) +void CPU::X64Recompiler::Compile_xori(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) cg->xor_(rt, imm); } -void CPU::Recompiler::X64Recompiler::Compile_sll(CompileFlags cf) +void CPU::X64Recompiler::Compile_sll(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->shl(rd, inst->r.shamt); } -void CPU::Recompiler::X64Recompiler::Compile_srl(CompileFlags cf) +void CPU::X64Recompiler::Compile_srl(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->shr(rd, inst->r.shamt); } -void CPU::Recompiler::X64Recompiler::Compile_sra(CompileFlags cf) +void CPU::X64Recompiler::Compile_sra(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->sar(rd, inst->r.shamt); } -void CPU::Recompiler::X64Recompiler::Compile_variable_shift( - CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Reg8&), - void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int)) +void CPU::X64Recompiler::Compile_variable_shift(CompileFlags cf, + void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, + const Xbyak::Reg8&), + void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int)) { const Reg32 rd = CFGetRegD(cf); if (!cf.const_s) @@ -1165,22 +1194,22 @@ void CPU::Recompiler::X64Recompiler::Compile_variable_shift( } } -void CPU::Recompiler::X64Recompiler::Compile_sllv(CompileFlags cf) +void CPU::X64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::shl, &CodeGenerator::shl); } -void CPU::Recompiler::X64Recompiler::Compile_srlv(CompileFlags cf) +void CPU::X64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::shr, &CodeGenerator::shr); } -void CPU::Recompiler::X64Recompiler::Compile_srav(CompileFlags cf) +void CPU::X64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::sar, &CodeGenerator::sar); } -void CPU::Recompiler::X64Recompiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::X64Recompiler::Compile_mult(CompileFlags cf, bool sign) { // RAX/RDX shouldn't be allocatable.. DebugAssert(!(m_host_regs[Xbyak::Operand::RAX].flags & HR_USABLE) && @@ -1212,17 +1241,17 @@ void CPU::Recompiler::X64Recompiler::Compile_mult(CompileFlags cf, bool sign) cg->mov(MipsPtr(Reg::hi), cg->edx); } -void CPU::Recompiler::X64Recompiler::Compile_mult(CompileFlags cf) +void CPU::X64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::Recompiler::X64Recompiler::Compile_multu(CompileFlags cf) +void CPU::X64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::Recompiler::X64Recompiler::Compile_div(CompileFlags cf) +void CPU::X64Recompiler::Compile_div(CompileFlags cf) { // not supported without registers for now.. DebugAssert(cf.valid_host_lo && cf.valid_host_hi); @@ -1268,7 +1297,7 @@ void CPU::Recompiler::X64Recompiler::Compile_div(CompileFlags cf) cg->L(done); } -void CPU::Recompiler::X64Recompiler::Compile_divu(CompileFlags cf) +void CPU::X64Recompiler::Compile_divu(CompileFlags cf) { // not supported without registers for now.. DebugAssert(cf.valid_host_lo && cf.valid_host_hi); @@ -1299,7 +1328,7 @@ void CPU::Recompiler::X64Recompiler::Compile_divu(CompileFlags cf) cg->L(done); } -void CPU::Recompiler::X64Recompiler::TestOverflow(const Xbyak::Reg32& result) +void CPU::X64Recompiler::TestOverflow(const Xbyak::Reg32& result) { SwitchToFarCode(true, &Xbyak::CodeGenerator::jo); @@ -1315,9 +1344,10 @@ void CPU::Recompiler::X64Recompiler::TestOverflow(const Xbyak::Reg32& result) SwitchToNearCode(false); } -void CPU::Recompiler::X64Recompiler::Compile_dst_op( - CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), - void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), bool commutative, bool overflow) +void CPU::X64Recompiler::Compile_dst_op(CompileFlags cf, + void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), + void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), + bool commutative, bool overflow) { if (cf.valid_host_s && cf.valid_host_t) { @@ -1401,27 +1431,27 @@ void CPU::Recompiler::X64Recompiler::Compile_dst_op( } } -void CPU::Recompiler::X64Recompiler::Compile_add(CompileFlags cf) +void CPU::X64Recompiler::Compile_add(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::add, &CodeGenerator::add, true, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::X64Recompiler::Compile_addu(CompileFlags cf) +void CPU::X64Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::add, &CodeGenerator::add, true, false); } -void CPU::Recompiler::X64Recompiler::Compile_sub(CompileFlags cf) +void CPU::X64Recompiler::Compile_sub(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::sub, &CodeGenerator::sub, false, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::Recompiler::X64Recompiler::Compile_subu(CompileFlags cf) +void CPU::X64Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::sub, &CodeGenerator::sub, false, false); } -void CPU::Recompiler::X64Recompiler::Compile_and(CompileFlags cf) +void CPU::X64Recompiler::Compile_and(CompileFlags cf) { // special cases - and with self -> self, and with 0 -> 0 const Reg32 regd = CFGetRegD(cf); @@ -1439,7 +1469,7 @@ void CPU::Recompiler::X64Recompiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::and_, &CodeGenerator::and_, true, false); } -void CPU::Recompiler::X64Recompiler::Compile_or(CompileFlags cf) +void CPU::X64Recompiler::Compile_or(CompileFlags cf) { // or/nor with 0 -> no effect const Reg32 regd = CFGetRegD(cf); @@ -1452,7 +1482,7 @@ void CPU::Recompiler::X64Recompiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::or_, &CodeGenerator::or_, true, false); } -void CPU::Recompiler::X64Recompiler::Compile_xor(CompileFlags cf) +void CPU::X64Recompiler::Compile_xor(CompileFlags cf) { const Reg32 regd = CFGetRegD(cf); if (cf.MipsS() == cf.MipsT()) @@ -1471,23 +1501,23 @@ void CPU::Recompiler::X64Recompiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::xor_, &CodeGenerator::xor_, true, false); } -void CPU::Recompiler::X64Recompiler::Compile_nor(CompileFlags cf) +void CPU::X64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); cg->not_(CFGetRegD(cf)); } -void CPU::Recompiler::X64Recompiler::Compile_slt(CompileFlags cf) +void CPU::X64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::Recompiler::X64Recompiler::Compile_sltu(CompileFlags cf) +void CPU::X64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::Recompiler::X64Recompiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::X64Recompiler::Compile_slt(CompileFlags cf, bool sign) { const Reg32 rd = CFGetRegD(cf); const Reg32 rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; @@ -1513,9 +1543,9 @@ void CPU::Recompiler::X64Recompiler::Compile_slt(CompileFlags cf, bool sign) sign ? cg->setl(rd.cvt8()) : cg->setb(rd.cvt8()); } -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::ComputeLoadStoreAddressArg( - CompileFlags cf, const std::optional& address, - const std::optional& reg /* = std::nullopt */) +Xbyak::Reg32 +CPU::X64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional& address, + const std::optional& reg /* = std::nullopt */) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1546,8 +1576,8 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::ComputeLoadStoreAddressArg( } template -Xbyak::Reg32 CPU::Recompiler::X64Recompiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, - bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) +Xbyak::Reg32 CPU::X64Recompiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, + bool use_fastmem, const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1608,20 +1638,20 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::GenerateLoad(const Xbyak::Reg32& ad { case MemoryAccessSize::Byte: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::ReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord)); } break; } @@ -1677,8 +1707,8 @@ Xbyak::Reg32 CPU::Recompiler::X64Recompiler::GenerateLoad(const Xbyak::Reg32& ad return dst_reg; } -void CPU::Recompiler::X64Recompiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, - MemoryAccessSize size, bool use_fastmem) +void CPU::X64Recompiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, + MemoryAccessSize size, bool use_fastmem) { if (use_fastmem) { @@ -1729,20 +1759,20 @@ void CPU::Recompiler::X64Recompiler::GenerateStore(const Xbyak::Reg32& addr_reg, { case MemoryAccessSize::Byte: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); + cg->call(checked ? reinterpret_cast(&RecompilerThunks::WriteMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord)); } break; } @@ -1774,8 +1804,8 @@ void CPU::Recompiler::X64Recompiler::GenerateStore(const Xbyak::Reg32& addr_reg, } } -void CPU::Recompiler::X64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1803,8 +1833,8 @@ void CPU::Recompiler::X64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSi } } -void CPU::Recompiler::X64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1907,8 +1937,8 @@ void CPU::Recompiler::X64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSi } } -void CPU::Recompiler::X64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1993,8 +2023,8 @@ void CPU::Recompiler::X64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessS } } -void CPU::Recompiler::X64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -2018,8 +2048,8 @@ void CPU::Recompiler::X64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSi } } -void CPU::Recompiler::X64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2098,8 +2128,8 @@ void CPU::Recompiler::X64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSi } } -void CPU::Recompiler::X64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::X64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2154,7 +2184,7 @@ void CPU::Recompiler::X64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessS FreeHostReg(data_backup.getIdx()); } -void CPU::Recompiler::X64Recompiler::Compile_mtc0(CompileFlags cf) +void CPU::X64Recompiler::Compile_mtc0(CompileFlags cf) { const Cop0Reg reg = static_cast(MipsD()); const u32* ptr = GetCop0RegPtr(reg); @@ -2238,7 +2268,7 @@ void CPU::Recompiler::X64Recompiler::Compile_mtc0(CompileFlags cf) } } -void CPU::Recompiler::X64Recompiler::Compile_rfe(CompileFlags cf) +void CPU::X64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits static constexpr u32 mode_bits_mask = UINT32_C(0b1111); @@ -2253,7 +2283,7 @@ void CPU::Recompiler::X64Recompiler::Compile_rfe(CompileFlags cf) TestInterrupts(RWARG1); } -void CPU::Recompiler::X64Recompiler::TestInterrupts(const Xbyak::Reg32& sr) +void CPU::X64Recompiler::TestInterrupts(const Xbyak::Reg32& sr) { // if Iec == 0 then goto no_interrupt Label no_interrupt; @@ -2301,7 +2331,7 @@ void CPU::Recompiler::X64Recompiler::TestInterrupts(const Xbyak::Reg32& sr) cg->L(no_interrupt); } -void CPU::Recompiler::X64Recompiler::Compile_mfc2(CompileFlags cf) +void CPU::X64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2342,7 +2372,7 @@ void CPU::Recompiler::X64Recompiler::Compile_mfc2(CompileFlags cf) } } -void CPU::Recompiler::X64Recompiler::Compile_mtc2(CompileFlags cf) +void CPU::X64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2416,7 +2446,7 @@ void CPU::Recompiler::X64Recompiler::Compile_mtc2(CompileFlags cf) } } -void CPU::Recompiler::X64Recompiler::Compile_cop2(CompileFlags cf) +void CPU::X64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2480,20 +2510,20 @@ u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, vo { case MemoryAccessSize::Byte: { - cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); + cg->call(is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryByte) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + cg->call(is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); + cg->call(is_load ? reinterpret_cast(&RecompilerThunks::UncheckedReadMemoryWord) : + reinterpret_cast(&RecompilerThunks::UncheckedWriteMemoryWord)); } break; } diff --git a/src/core/cpu_recompiler_x64.h b/src/core/cpu_recompiler_x64.h index aa97b9b2c..c2f1285ed 100644 --- a/src/core/cpu_recompiler_x64.h +++ b/src/core/cpu_recompiler_x64.h @@ -9,7 +9,15 @@ #ifdef CPU_ARCH_X64 -namespace CPU::Recompiler { +// We need to include windows.h before xbyak does.. +#ifdef _WIN32 +#include "common/windows_headers.h" +#endif + +#define XBYAK_NO_OP_NAMES 1 +#include "xbyak.h" + +namespace CPU { class X64Recompiler final : public Recompiler { @@ -141,6 +149,6 @@ private: Xbyak::CodeGenerator* cg; }; -} // namespace CPU::Recompiler +} // namespace CPU #endif // CPU_ARCH_X64