diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f9cad94..efb5edf1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,13 @@ endif() # Add the module directory to the list of paths list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules") +# Arch detection +include(DetectArchitecture) +if (NOT DEFINED ARCHITECTURE) + message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.") +endif() +message(STATUS "Target architecture: ${ARCHITECTURE}") + # Compiler flags if (MSVC) set(DYNARMIC_CXX_FLAGS @@ -91,7 +98,12 @@ else() -Wextra -Wcast-qual -pedantic - -Wno-missing-braces) + -Wno-missing-braces + -Wstack-usage=4096) + + if (ARCHITECTURE STREQUAL "x86_64") + list(APPEND DYNARMIC_CXX_FLAGS -mtune=core2) + endif() if (DYNARMIC_WARNINGS_AS_ERRORS) list(APPEND DYNARMIC_CXX_FLAGS @@ -120,13 +132,6 @@ else() endif() endif() -# Arch detection -include(DetectArchitecture) -if (NOT DEFINED ARCHITECTURE) - message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.") -endif() -message(STATUS "Target architecture: ${ARCHITECTURE}") - # Forced use of individual bundled libraries for non-REQUIRED library is possible with e.g. cmake -DCMAKE_DISABLE_FIND_PACKAGE_fmt=ON ... if (DYNARMIC_USE_BUNDLED_EXTERNALS) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 04395148..974bb99a 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -60,16 +60,15 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if (args[0].IsInGpr() && args[1].IsInGpr()) { + bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() }; + if (args_in_gpr[0] && args_in_gpr[1]) { auto Xlo = ctx.reg_alloc.ReadX(args[0]); auto Xhi = ctx.reg_alloc.ReadX(args[1]); auto Qresult = ctx.reg_alloc.WriteQ(inst); RegAlloc::Realize(Xlo, Xhi, Qresult); - code.FMOV(Qresult->toD(), Xlo); code.MOV(oaknut::VRegSelector{Qresult->index()}.D()[1], Xhi); - } else if (args[0].IsInGpr()) { + } else if (args_in_gpr[0]) { auto Xlo = ctx.reg_alloc.ReadX(args[0]); auto Dhi = ctx.reg_alloc.ReadD(args[1]); auto Qresult = ctx.reg_alloc.WriteQ(inst); @@ -77,7 +76,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContex code.FMOV(Qresult->toD(), Xlo); code.MOV(oaknut::VRegSelector{Qresult->index()}.D()[1], oaknut::VRegSelector{Dhi->index()}.D()[0]); - } else if (args[1].IsInGpr()) { + } else if (args_in_gpr[1]) { auto Dlo = ctx.reg_alloc.ReadD(args[0]); auto Xhi = ctx.reg_alloc.ReadX(args[1]); auto Qresult = ctx.reg_alloc.WriteQ(inst); diff --git a/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/backend/x64/a64_emit_x64.cpp index 073823f0..ad84e0ec 100644 --- a/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -65,8 +65,8 @@ A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_in A64EmitX64::~A64EmitX64() = default; -A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { - if (conf.very_verbose_debugging_output) { +A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { + if (conf.very_verbose_debugging_output) [[unlikely]] { std::puts(IR::DumpBlock(block).c_str()); } @@ -91,47 +91,54 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { // Start emitting. code.align(); - const u8* const entrypoint = code.getCurr(); + const auto* const entrypoint = code.getCurr(); - ASSERT(block.GetCondition() == IR::Cond::AL); - - static void (EmitX64::*opcode_handlers[])(EmitContext& context, IR::Inst* inst) = { + DEBUG_ASSERT(block.GetCondition() == IR::Cond::AL); + typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst); + constexpr EmitHandlerFn opcode_handlers[] = { #define OPCODE(name, type, ...) &EmitX64::Emit##name, +#define A32OPC(name, type, ...) +#define A64OPC(name, type, ...) +#include "dynarmic/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + }; + typedef void (A64EmitX64::*A64EmitHandlerFn)(A64EmitContext& context, IR::Inst* inst); + constexpr A64EmitHandlerFn a64_handlers[] = { +#define OPCODE(...) #define A32OPC(...) -#define A64OPC(...) +#define A64OPC(name, type, ...) &A64EmitX64::EmitA64##name, #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC #undef A64OPC }; - for (auto iter = block.begin(); iter != block.end(); ++iter) { - IR::Inst* inst = &*iter; - + for (auto& inst : block) { + auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. - switch (inst->GetOpcode()) { -#define OPCODE(name, type, ...) \ - case IR::Opcode::name: goto true_opcode_branch; -#define A32OPC(...) -#define A64OPC(name, type, ...) \ - case IR::Opcode::A64##name: \ - A64EmitX64::EmitA64##name(ctx, inst); \ - break; + switch (opcode) { +#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define A32OPC(name, type, ...) +#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC #undef A64OPC - default: - ASSERT_MSG(false, "Invalid opcode: {}", inst->GetOpcode()); - break; + default: [[unlikely]] { + ASSERT_MSG(false, "Invalid opcode: {}", opcode); + goto finish_this_inst; } - goto false_opcode_branch; -true_opcode_branch: - (this->*opcode_handlers[size_t(inst->GetOpcode())])(ctx, inst); -false_opcode_branch: + } +opcode_branch: + (this->*opcode_handlers[size_t(opcode)])(ctx, &inst); + goto finish_this_inst; +a64_branch: + (this->*a64_handlers[size_t(opcode) - std::size(opcode_handlers)])(ctx, &inst); +finish_this_inst: ctx.reg_alloc.EndOfAllocScope(); - - if (conf.very_verbose_debugging_output) { + if (conf.very_verbose_debugging_output) [[unlikely]] { EmitVerboseDebuggingOutput(reg_alloc); } } diff --git a/src/dynarmic/backend/x64/a64_emit_x64.h b/src/dynarmic/backend/x64/a64_emit_x64.h index 7f843df6..53c3570e 100644 --- a/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/src/dynarmic/backend/x64/a64_emit_x64.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "dynarmic/backend/block_range_information.h" #include "dynarmic/backend/x64/a64_jitstate.h" @@ -41,11 +42,9 @@ public: A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_interface); ~A64EmitX64() override; - /** - * Emit host machine code for a basic block with intermediate representation `block`. - * @note block is modified. - */ - BlockDescriptor Emit(IR::Block& block); + /// Emit host machine code for a basic block with intermediate representation `block`. + /// @note block is modified. + BlockDescriptor Emit(IR::Block& block) noexcept; void ClearCache() override; @@ -64,7 +63,7 @@ protected: void GenMemory128Accessors(); void GenFastmemFallbacks(); void GenTerminalHandlers(); - + // Microinstruction emitters void EmitPushRSB(EmitContext& ctx, IR::Inst* inst); #define OPCODE(...) @@ -132,13 +131,13 @@ protected: std::map, void (*)()> write_fallbacks; std::map, void (*)()> exclusive_write_fallbacks; std::set do_not_fastmem; - const void* terminal_handler_pop_rsb_hint; + const void* terminal_handler_pop_rsb_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr; FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; - A64::Jit* jit_interface; - void (*memory_read_128)(); - void (*memory_write_128)(); - void (*memory_exclusive_write_128)(); + A64::Jit* jit_interface = nullptr; + void (*memory_read_128)() = nullptr; + void (*memory_write_128)() = nullptr; + void (*memory_exclusive_write_128)() = nullptr; }; } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/backend/x64/emit_x64.cpp index ea19fdf2..8bd9102d 100644 --- a/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/backend/x64/emit_x64.cpp @@ -55,6 +55,10 @@ std::optional EmitX64::GetBasicBlock(IR::LocationDescr return iter->second; } +void EmitX64::EmitInvalid(EmitContext&, IR::Inst* inst) { + ASSERT_MSG(false, "Invalid opcode: {}", inst->GetOpcode()); +} + void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { } diff --git a/src/dynarmic/backend/x64/emit_x64.h b/src/dynarmic/backend/x64/emit_x64.h index 008d3ebe..fbe749b2 100644 --- a/src/dynarmic/backend/x64/emit_x64.h +++ b/src/dynarmic/backend/x64/emit_x64.h @@ -101,6 +101,7 @@ protected: #undef OPCODE #undef A32OPC #undef A64OPC + void EmitInvalid(EmitContext& ctx, IR::Inst* inst); // Helpers virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0; diff --git a/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/backend/x64/reg_alloc.cpp index c03c0c7b..a0ee8ae9 100644 --- a/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/backend/x64/reg_alloc.cpp @@ -29,12 +29,12 @@ namespace Dynarmic::Backend::X64 { } \ }() -static bool CanExchange(HostLoc a, HostLoc b) { +static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept { return HostLocIsGPR(a) && HostLocIsGPR(b); } // Minimum number of bits required to represent a type -static size_t GetBitWidth(IR::Type type) { +static inline size_t GetBitWidth(const IR::Type type) noexcept { switch (type) { case IR::Type::A32Reg: case IR::Type::A32ExtReg: @@ -66,7 +66,7 @@ static size_t GetBitWidth(IR::Type type) { UNREACHABLE(); } -static bool IsValuelessType(IR::Type type) { +static inline bool IsValuelessType(const IR::Type type) noexcept { switch (type) { case IR::Type::Table: return true; @@ -75,46 +75,14 @@ static bool IsValuelessType(IR::Type type) { } } -bool HostLocInfo::IsLocked() const { - return is_being_used_count > 0; -} - -bool HostLocInfo::IsEmpty() const { - return is_being_used_count == 0 && values.empty(); -} - -bool HostLocInfo::IsLastUse() const { - return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses; -} - -void HostLocInfo::SetLastUse() { - ASSERT(IsLastUse()); - is_set_last_use = true; -} - -void HostLocInfo::ReadLock() { - ASSERT(!is_scratch); - is_being_used_count++; -} - -void HostLocInfo::WriteLock() { - ASSERT(is_being_used_count == 0); - is_being_used_count++; - is_scratch = true; -} - -void HostLocInfo::AddArgReference() { - current_references++; - ASSERT(accumulated_uses + current_references <= total_uses); -} - -void HostLocInfo::ReleaseOne() { +void HostLocInfo::ReleaseOne() noexcept { is_being_used_count--; is_scratch = false; if (current_references == 0) return; + ASSERT(size_t(accumulated_uses) + 1 < std::numeric_limits::max()); accumulated_uses++; current_references--; @@ -122,7 +90,7 @@ void HostLocInfo::ReleaseOne() { ReleaseAll(); } -void HostLocInfo::ReleaseAll() { +void HostLocInfo::ReleaseAll() noexcept { accumulated_uses += current_references; current_references = 0; @@ -139,27 +107,20 @@ void HostLocInfo::ReleaseAll() { is_scratch = false; } -bool HostLocInfo::ContainsValue(const IR::Inst* inst) const { - return std::find(values.begin(), values.end(), inst) != values.end(); -} - -size_t HostLocInfo::GetMaxBitWidth() const { - return max_bit_width; -} - -void HostLocInfo::AddValue(IR::Inst* inst) { +void HostLocInfo::AddValue(IR::Inst* inst) noexcept { if (is_set_last_use) { is_set_last_use = false; values.clear(); } values.push_back(inst); + ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); } -void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const { +void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { using namespace Xbyak::util; - for (IR::Inst* value : values) { + for (auto const value : values) { code->mov(code->ABI_PARAM1, rsp); code->mov(code->ABI_PARAM2, host_loc_index); code->mov(code->ABI_PARAM3, value->GetName()); @@ -168,106 +129,96 @@ void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_ } } -IR::Type Argument::GetType() const { - return value.GetType(); -} - -bool Argument::IsImmediate() const { - return value.IsImmediate(); -} - -bool Argument::IsVoid() const { - return GetType() == IR::Type::Void; -} - -bool Argument::FitsInImmediateU32() const { +bool Argument::FitsInImmediateU32() const noexcept { if (!IsImmediate()) return false; const u64 imm = value.GetImmediateAsU64(); return imm < 0x100000000; } -bool Argument::FitsInImmediateS32() const { +bool Argument::FitsInImmediateS32() const noexcept { if (!IsImmediate()) return false; const s64 imm = static_cast(value.GetImmediateAsU64()); return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF); } -bool Argument::GetImmediateU1() const { +bool Argument::GetImmediateU1() const noexcept { return value.GetU1(); } -u8 Argument::GetImmediateU8() const { +u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); ASSERT(imm < 0x100); return u8(imm); } -u16 Argument::GetImmediateU16() const { +u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); ASSERT(imm < 0x10000); return u16(imm); } -u32 Argument::GetImmediateU32() const { +u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); ASSERT(imm < 0x100000000); return u32(imm); } -u64 Argument::GetImmediateS32() const { +u64 Argument::GetImmediateS32() const noexcept { ASSERT(FitsInImmediateS32()); return value.GetImmediateAsU64(); } -u64 Argument::GetImmediateU64() const { +u64 Argument::GetImmediateU64() const noexcept { return value.GetImmediateAsU64(); } -IR::Cond Argument::GetImmediateCond() const { +IR::Cond Argument::GetImmediateCond() const noexcept { ASSERT(IsImmediate() && GetType() == IR::Type::Cond); return value.GetCond(); } -IR::AccType Argument::GetImmediateAccType() const { +IR::AccType Argument::GetImmediateAccType() const noexcept { ASSERT(IsImmediate() && GetType() == IR::Type::AccType); return value.GetAccType(); } /// Is this value currently in a GPR? -bool Argument::IsInGpr() const { +bool Argument::IsInGpr() const noexcept { if (IsImmediate()) return false; return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst())); } /// Is this value currently in a XMM? -bool Argument::IsInXmm() const { +bool Argument::IsInXmm() const noexcept { if (IsImmediate()) return false; return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst())); } /// Is this value currently in memory? -bool Argument::IsInMemory() const { +bool Argument::IsInMemory() const noexcept { if (IsImmediate()) return false; return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst())); } -RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) - : gpr_order(gpr_order) - , xmm_order(xmm_order) - , hostloc_info(NonSpillHostLocCount + SpillCount) - , code(code) {} +RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept + : gpr_order(gpr_order), + xmm_order(xmm_order), + code(code) +{ + +} //static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0; -RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { - ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; +RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept { + ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; for (size_t i = 0; i < inst->NumArgs(); i++) { - const IR::Value arg = inst->GetArg(i); + const auto arg = inst->GetArg(i); ret[i].value = arg; if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) { ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined"); @@ -277,11 +228,10 @@ RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { return ret; } -void RegAlloc::RegisterPseudoOperation(IR::Inst* inst) { +void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept { ASSERT(IsValueLive(inst) || !inst->HasUses()); - for (size_t i = 0; i < inst->NumArgs(); i++) { - const IR::Value arg = inst->GetArg(i); + const auto arg = inst->GetArg(i); if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) { if (const auto loc = ValueLocation(arg.GetInst())) { // May not necessarily have a value (e.g. CMP variant of Sub32). @@ -291,90 +241,48 @@ void RegAlloc::RegisterPseudoOperation(IR::Inst* inst) { } } -bool RegAlloc::IsValueLive(const IR::Inst* inst) const { - return !!ValueLocation(inst); -} - -Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToReg64(UseImpl(arg.value, gpr_order)); -} - -Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToXmm(UseImpl(arg.value, xmm_order)); -} - -OpArg RegAlloc::UseOpArg(Argument& arg) { - return UseGpr(arg); -} - -void RegAlloc::Use(Argument& arg, HostLoc host_loc) { - ASSERT(!arg.allocated); - arg.allocated = true; - UseImpl(arg.value, {host_loc}); -} - -Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) { +Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; return HostLocToReg64(UseScratchImpl(arg.value, gpr_order)); } -Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) { +Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; return HostLocToXmm(UseScratchImpl(arg.value, xmm_order)); } -void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) { +void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept { ASSERT(!arg.allocated); arg.allocated = true; UseScratchImpl(arg.value, {host_loc}); } -void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) { +void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept { ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); const auto hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); DefineValueImpl(inst, hostloc); } -void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) { +void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; DefineValueImpl(inst, arg.value); } -void RegAlloc::Release(const Xbyak::Reg& reg) { +void RegAlloc::Release(const Xbyak::Reg& reg) noexcept { ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); const auto hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); LocInfo(hostloc).ReleaseOne(); } -Xbyak::Reg64 RegAlloc::ScratchGpr() { - return HostLocToReg64(ScratchImpl(gpr_order)); -} - -Xbyak::Reg64 RegAlloc::ScratchGpr(HostLoc desired_location) { - return HostLocToReg64(ScratchImpl({desired_location})); -} - -Xbyak::Xmm RegAlloc::ScratchXmm() { - return HostLocToXmm(ScratchImpl(xmm_order)); -} - -Xbyak::Xmm RegAlloc::ScratchXmm(HostLoc desired_location) { - return HostLocToXmm(ScratchImpl({desired_location})); -} - -HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) { +HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { if (use_value.IsImmediate()) { return LoadImmediate(use_value, ScratchImpl(desired_locations)); } - const IR::Inst* use_inst = use_value.GetInst(); + const auto* use_inst = use_value.GetInst(); const HostLoc current_location = *ValueLocation(use_inst); const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth(); @@ -401,12 +309,12 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve return destination_location; } -HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) { +HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { if (use_value.IsImmediate()) { return LoadImmediate(use_value, ScratchImpl(desired_locations)); } - const IR::Inst* use_inst = use_value.GetInst(); + const auto* use_inst = use_value.GetInst(); const HostLoc current_location = *ValueLocation(use_inst); const size_t bit_width = GetBitWidth(use_inst->GetType()); @@ -428,7 +336,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st return destination_location; } -HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector& desired_locations) { +HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector& desired_locations) noexcept { const HostLoc location = SelectARegister(desired_locations); MoveOutOfTheWay(location); LocInfo(location).WriteLock(); @@ -440,19 +348,17 @@ void RegAlloc::HostCall(IR::Inst* result_def, const std::optional arg1, const std::optional arg2, const std::optional arg3 -) { +) noexcept { constexpr size_t args_count = 4; constexpr std::array args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4}; const std::array, args_count> args = {arg0, arg1, arg2, arg3}; - static const boost::container::static_vector other_caller_save = [args_hostloc]() { + static const boost::container::static_vector other_caller_save = [args_hostloc]() noexcept { boost::container::static_vector ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end()); - ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN)); - for (auto hostloc : args_hostloc) { + for (auto const hostloc : args_hostloc) { ret.erase(std::find(ret.begin(), ret.end(), hostloc)); } - return ret; }(); @@ -494,41 +400,25 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } -void RegAlloc::AllocStackSpace(const size_t stack_space) { +void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { ASSERT(stack_space < static_cast(std::numeric_limits::max())); ASSERT(reserved_stack_space == 0); reserved_stack_space = stack_space; code->sub(code->rsp, static_cast(stack_space)); } -void RegAlloc::ReleaseStackSpace(const size_t stack_space) { +void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept { ASSERT(stack_space < static_cast(std::numeric_limits::max())); ASSERT(reserved_stack_space == stack_space); reserved_stack_space = 0; code->add(code->rsp, static_cast(stack_space)); } -void RegAlloc::EndOfAllocScope() { - for (auto& iter : hostloc_info) { - iter.ReleaseAll(); - } -} - -void RegAlloc::AssertNoMoreUses() { - ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); })); -} - -void RegAlloc::EmitVerboseDebuggingOutput() { - for (size_t i = 0; i < hostloc_info.size(); i++) { - hostloc_info[i].EmitVerboseDebuggingOutput(code, i); - } -} - -HostLoc RegAlloc::SelectARegister(const boost::container::static_vector& desired_locations) const { +HostLoc RegAlloc::SelectARegister(const boost::container::static_vector& desired_locations) const noexcept { boost::container::static_vector candidates = desired_locations; //Who let someone copy an ENTIRE VECTOR here? // Find all locations that have not been allocated.. - const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) { + const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) noexcept { return !this->LocInfo(loc).IsLocked(); }); candidates.erase(allocated_locs, candidates.end()); @@ -536,30 +426,18 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vectorLocInfo(loc).IsEmpty(); }); - - return candidates.front(); + return it != candidates.end() ? *it : candidates.front(); } -std::optional RegAlloc::ValueLocation(const IR::Inst* value) const { - for (size_t i = 0; i < hostloc_info.size(); i++) { - if (hostloc_info[i].ContainsValue(value)) { - return static_cast(i); - } - } - - return std::nullopt; -} - -void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) { +void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept { ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); LocInfo(host_loc).AddValue(def_inst); } -void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { +void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept { ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); if (use_inst.IsImmediate()) { @@ -574,7 +452,7 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { DefineValueImpl(def_inst, location); } -HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { +HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept { ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate"); if (HostLocIsGPR(host_loc)) { @@ -602,55 +480,46 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { UNREACHABLE(); } -void RegAlloc::Move(HostLoc to, HostLoc from) { +void RegAlloc::Move(HostLoc to, HostLoc from) noexcept { const size_t bit_width = LocInfo(from).GetMaxBitWidth(); ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked()); ASSERT(bit_width <= HostLocBitWidth(to)); - if (LocInfo(from).IsEmpty()) { - return; + if (!LocInfo(from).IsEmpty()) { + EmitMove(bit_width, to, from); + LocInfo(to) = std::exchange(LocInfo(from), {}); } - - EmitMove(bit_width, to, from); - - LocInfo(to) = std::exchange(LocInfo(from), {}); } -void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) { +void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept { ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty()); - EmitMove(bit_width, to, from); } -void RegAlloc::Exchange(HostLoc a, HostLoc b) { +void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept { ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked()); ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b)); ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a)); if (LocInfo(a).IsEmpty()) { Move(a, b); - return; - } - - if (LocInfo(b).IsEmpty()) { + } else if (LocInfo(b).IsEmpty()) { Move(b, a); - return; + } else { + EmitExchange(a, b); + std::swap(LocInfo(a), LocInfo(b)); } - - EmitExchange(a, b); - - std::swap(LocInfo(a), LocInfo(b)); } -void RegAlloc::MoveOutOfTheWay(HostLoc reg) { +void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept { ASSERT(!LocInfo(reg).IsLocked()); if (!LocInfo(reg).IsEmpty()) { SpillRegister(reg); } } -void RegAlloc::SpillRegister(HostLoc loc) { +void RegAlloc::SpillRegister(HostLoc loc) noexcept { ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers"); ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); @@ -659,7 +528,7 @@ void RegAlloc::SpillRegister(HostLoc loc) { Move(new_loc, loc); } -HostLoc RegAlloc::FindFreeSpill() const { +HostLoc RegAlloc::FindFreeSpill() const noexcept { for (size_t i = static_cast(HostLoc::FirstSpill); i < hostloc_info.size(); i++) { const auto loc = static_cast(i); if (LocInfo(loc).IsEmpty()) { @@ -670,24 +539,14 @@ HostLoc RegAlloc::FindFreeSpill() const { ASSERT_FALSE("All spill locations are full"); } -HostLocInfo& RegAlloc::LocInfo(HostLoc loc) { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); - return hostloc_info[static_cast(loc)]; -} - -const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); - return hostloc_info[static_cast(loc)]; -} - -inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) { +inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept { ASSERT(HostLocIsSpill(loc)); size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); } -void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) { +void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { if (HostLocIsXMM(to) && HostLocIsXMM(from)) { MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { @@ -766,7 +625,7 @@ void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) { } } -void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { +void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept { if (HostLocIsGPR(a) && HostLocIsGPR(b)) { code->xchg(HostLocToReg64(a), HostLocToReg64(b)); } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) { @@ -776,7 +635,7 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { } } -Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) { +Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept { return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)]; } diff --git a/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/backend/x64/reg_alloc.h index ba9e2243..599aab12 100644 --- a/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/backend/x64/reg_alloc.h @@ -8,12 +8,12 @@ #include #include #include -#include -#include #include #include #include +#include +#include #include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/hostloc.h" @@ -33,63 +33,100 @@ class RegAlloc; struct HostLocInfo { public: - bool IsLocked() const; - bool IsEmpty() const; - bool IsLastUse() const; + HostLocInfo() {} + inline bool IsLocked() const { + return is_being_used_count > 0; + } + inline bool IsEmpty() const { + return is_being_used_count == 0 && values.empty(); + } + inline bool IsLastUse() const { + return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses; + } + inline void SetLastUse() noexcept { + ASSERT(IsLastUse()); + is_set_last_use = true; + } + inline void ReadLock() noexcept { + ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits::max()); + ASSERT(!is_scratch); + is_being_used_count++; + } + inline void WriteLock() noexcept { + ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits::max()); + ASSERT(is_being_used_count == 0); + is_being_used_count++; + is_scratch = true; + } + inline void AddArgReference() noexcept { + ASSERT(size_t(current_references) + 1 < std::numeric_limits::max()); + current_references++; + ASSERT(accumulated_uses + current_references <= total_uses); + } + void ReleaseOne() noexcept; + void ReleaseAll() noexcept; - void SetLastUse(); - - void ReadLock(); - void WriteLock(); - void AddArgReference(); - void ReleaseOne(); - void ReleaseAll(); - - bool ContainsValue(const IR::Inst* inst) const; - size_t GetMaxBitWidth() const; - void AddValue(IR::Inst* inst); - void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const; + /// Checks if the given instruction is in our values set + /// SAFETY: Const is casted away, irrelevant since this is only used for checking + inline bool ContainsValue(const IR::Inst* inst) const noexcept { + //return values.contains(const_cast(inst)); + return std::find(values.begin(), values.end(), inst) != values.end(); + } + inline size_t GetMaxBitWidth() const noexcept { + return max_bit_width; + } + void AddValue(IR::Inst* inst) noexcept; + void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial std::vector values; //24 -//sometimes zeroed - size_t accumulated_uses = 0; //8 // Block state - size_t total_uses = 0; //8 - // Value state - size_t max_bit_width = 0; //8 + uint16_t total_uses = 0; //8 + //sometimes zeroed + uint16_t accumulated_uses = 0; //8 //always zeroed // Current instruction state - size_t is_being_used_count = 0; //8 - size_t current_references = 0; //8 - bool is_scratch = false; //1 - bool is_set_last_use = false; //1 + uint16_t is_being_used_count = 0; //8 + uint16_t current_references = 0; //8 + // Value state + uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + bool is_scratch : 1 = false; //1 + bool is_set_last_use : 1 = false; //1 + + alignas(16) char padding; }; -static_assert(sizeof(HostLocInfo) == 72); +static_assert(sizeof(HostLocInfo) == 64); struct Argument { public: using copyable_reference = std::reference_wrapper; - IR::Type GetType() const; - bool IsImmediate() const; - bool IsVoid() const; + inline IR::Type GetType() const noexcept { + return value.GetType(); + } + inline bool IsImmediate() const noexcept { + return value.IsImmediate(); + } + inline bool IsVoid() const noexcept { + return GetType() == IR::Type::Void; + } - bool FitsInImmediateU32() const; - bool FitsInImmediateS32() const; + bool FitsInImmediateU32() const noexcept; + bool FitsInImmediateS32() const noexcept; - bool GetImmediateU1() const; - u8 GetImmediateU8() const; - u16 GetImmediateU16() const; - u32 GetImmediateU32() const; - u64 GetImmediateS32() const; - u64 GetImmediateU64() const; - IR::Cond GetImmediateCond() const; - IR::AccType GetImmediateAccType() const; + bool GetImmediateU1() const noexcept; + u8 GetImmediateU8() const noexcept; + u16 GetImmediateU16() const noexcept; + u32 GetImmediateU32() const noexcept; + u64 GetImmediateS32() const noexcept; + u64 GetImmediateU64() const noexcept; + IR::Cond GetImmediateCond() const noexcept; + IR::AccType GetImmediateAccType() const noexcept; - bool IsInGpr() const; - bool IsInXmm() const; - bool IsInMemory() const; + /// Is this value currently in a GPR? + bool IsInGpr() const noexcept; + bool IsInXmm() const noexcept; + bool IsInMemory() const noexcept; private: friend class RegAlloc; explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} @@ -103,76 +140,124 @@ private: class RegAlloc final { public: using ArgumentInfo = std::array; - RegAlloc() = default; - RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order); + RegAlloc() noexcept = default; + RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept; - ArgumentInfo GetArgumentInfo(IR::Inst* inst); - void RegisterPseudoOperation(IR::Inst* inst); - bool IsValueLive(const IR::Inst* inst) const; + ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept; + void RegisterPseudoOperation(const IR::Inst* inst) noexcept; + inline bool IsValueLive(const IR::Inst* inst) const noexcept { + return !!ValueLocation(inst); + } + inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToReg64(UseImpl(arg.value, gpr_order)); + } + inline Xbyak::Xmm UseXmm(Argument& arg) noexcept { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToXmm(UseImpl(arg.value, xmm_order)); + } + inline OpArg UseOpArg(Argument& arg) noexcept { + return UseGpr(arg); + } + inline void Use(Argument& arg, const HostLoc host_loc) noexcept { + ASSERT(!arg.allocated); + arg.allocated = true; + UseImpl(arg.value, {host_loc}); + } - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); + Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept; + Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept; + void UseScratch(Argument& arg, HostLoc host_loc) noexcept; - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); + void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept; + void DefineValue(IR::Inst* inst, Argument& arg) noexcept; - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); - void DefineValue(IR::Inst* inst, Argument& arg); + void Release(const Xbyak::Reg& reg) noexcept; - void Release(const Xbyak::Reg& reg); - - Xbyak::Reg64 ScratchGpr(); - Xbyak::Reg64 ScratchGpr(HostLoc desired_location); - Xbyak::Xmm ScratchXmm(); - Xbyak::Xmm ScratchXmm(HostLoc desired_location); + inline Xbyak::Reg64 ScratchGpr() noexcept { + return HostLocToReg64(ScratchImpl(gpr_order)); + } + inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept { + return HostLocToReg64(ScratchImpl({desired_location})); + } + inline Xbyak::Xmm ScratchXmm() noexcept { + return HostLocToXmm(ScratchImpl(xmm_order)); + } + inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept { + return HostLocToXmm(ScratchImpl({desired_location})); + } void HostCall(IR::Inst* result_def = nullptr, const std::optional arg0 = {}, const std::optional arg1 = {}, const std::optional arg2 = {}, const std::optional arg3 = {} - ); + ) noexcept; // TODO: Values in host flags - void AllocStackSpace(const size_t stack_space); - void ReleaseStackSpace(const size_t stack_space); - void EndOfAllocScope(); - void AssertNoMoreUses(); - void EmitVerboseDebuggingOutput(); + void AllocStackSpace(const size_t stack_space) noexcept; + void ReleaseStackSpace(const size_t stack_space) noexcept; + + inline void EndOfAllocScope() noexcept { + for (auto& iter : hostloc_info) { + iter.ReleaseAll(); + } + } + inline void AssertNoMoreUses() noexcept { + ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); })); + } + inline void EmitVerboseDebuggingOutput() noexcept { + for (size_t i = 0; i < hostloc_info.size(); i++) { + hostloc_info[i].EmitVerboseDebuggingOutput(code, i); + } + } private: friend struct Argument; - HostLoc SelectARegister(const boost::container::static_vector& desired_locations) const; - std::optional ValueLocation(const IR::Inst* value) const; + HostLoc SelectARegister(const boost::container::static_vector& desired_locations) const noexcept; + inline std::optional ValueLocation(const IR::Inst* value) const noexcept { + for (size_t i = 0; i < hostloc_info.size(); i++) { + if (hostloc_info[i].ContainsValue(value)) { + return HostLoc(i); + } + } + return std::nullopt; + } - HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations); - HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations); - HostLoc ScratchImpl(const boost::container::static_vector& desired_locations); - void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc); - void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst); + HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; + HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; + HostLoc ScratchImpl(const boost::container::static_vector& desired_locations) noexcept; + void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept; + void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept; - HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc); - void Move(HostLoc to, HostLoc from); - void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from); - void Exchange(HostLoc a, HostLoc b); - void MoveOutOfTheWay(HostLoc reg); + HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept; + void Move(HostLoc to, HostLoc from) noexcept; + void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept; + void Exchange(HostLoc a, HostLoc b) noexcept; + void MoveOutOfTheWay(HostLoc reg) noexcept; - void SpillRegister(HostLoc loc); - HostLoc FindFreeSpill() const; - HostLocInfo& LocInfo(HostLoc loc); - const HostLocInfo& LocInfo(HostLoc loc) const; + void SpillRegister(HostLoc loc) noexcept; + HostLoc FindFreeSpill() const noexcept; + + inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + return hostloc_info[static_cast(loc)]; + } + inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + return hostloc_info[static_cast(loc)]; + } - void EmitMove(size_t bit_width, HostLoc to, HostLoc from); - void EmitExchange(HostLoc a, HostLoc b); - Xbyak::Address SpillToOpArg(HostLoc loc); + void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; + void EmitExchange(const HostLoc a, const HostLoc b) noexcept; + Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept; //data alignas(64) boost::container::static_vector gpr_order; alignas(64) boost::container::static_vector xmm_order; - alignas(64) boost::container::static_vector hostloc_info; + alignas(64) std::array hostloc_info; BlockOfCode* code = nullptr; size_t reserved_stack_space = 0; }; diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 0a29db6b..b0522083 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -1,3 +1,7 @@ +// First we list common shared opcodes +// Since we give priority to A64 performance, we include them first, this is so we +// can discard all A32 opcodes instead of having a "hole" in our checks + // clang-format off // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... @@ -644,6 +648,68 @@ OPCODE(FPVectorToUnsignedFixed16, U128, U128 OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8, U1 ) OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8, U1 ) +// A64 Context getters/setters +A64OPC(SetCheckBit, Void, U1 ) +A64OPC(GetCFlag, U1, ) +A64OPC(GetNZCVRaw, U32, ) +A64OPC(SetNZCVRaw, Void, U32 ) +A64OPC(SetNZCV, Void, NZCV ) +A64OPC(GetW, U32, A64Reg ) +A64OPC(GetX, U64, A64Reg ) +A64OPC(GetS, U128, A64Vec ) +A64OPC(GetD, U128, A64Vec ) +A64OPC(GetQ, U128, A64Vec ) +A64OPC(GetSP, U64, ) +A64OPC(GetFPCR, U32, ) +A64OPC(GetFPSR, U32, ) +A64OPC(SetW, Void, A64Reg, U32 ) +A64OPC(SetX, Void, A64Reg, U64 ) +A64OPC(SetS, Void, A64Vec, U128 ) +A64OPC(SetD, Void, A64Vec, U128 ) +A64OPC(SetQ, Void, A64Vec, U128 ) +A64OPC(SetSP, Void, U64 ) +A64OPC(SetFPCR, Void, U32 ) +A64OPC(SetFPSR, Void, U32 ) +A64OPC(SetPC, Void, U64 ) +A64OPC(CallSupervisor, Void, U32 ) +A64OPC(ExceptionRaised, Void, U64, U64 ) +A64OPC(DataCacheOperationRaised, Void, U64, U64, U64 ) +A64OPC(InstructionCacheOperationRaised, Void, U64, U64 ) +A64OPC(DataSynchronizationBarrier, Void, ) +A64OPC(DataMemoryBarrier, Void, ) +A64OPC(InstructionSynchronizationBarrier, Void, ) +A64OPC(GetCNTFRQ, U32, ) +A64OPC(GetCNTPCT, U64, ) +A64OPC(GetCTR, U32, ) +A64OPC(GetDCZID, U32, ) +A64OPC(GetTPIDR, U64, ) +A64OPC(GetTPIDRRO, U64, ) +A64OPC(SetTPIDR, Void, U64 ) + +// A64 Memory access +A64OPC(ClearExclusive, Void, ) +A64OPC(ReadMemory8, U8, U64, U64, AccType ) +A64OPC(ReadMemory16, U16, U64, U64, AccType ) +A64OPC(ReadMemory32, U32, U64, U64, AccType ) +A64OPC(ReadMemory64, U64, U64, U64, AccType ) +A64OPC(ReadMemory128, U128, U64, U64, AccType ) +A64OPC(ExclusiveReadMemory8, U8, U64, U64, AccType ) +A64OPC(ExclusiveReadMemory16, U16, U64, U64, AccType ) +A64OPC(ExclusiveReadMemory32, U32, U64, U64, AccType ) +A64OPC(ExclusiveReadMemory64, U64, U64, U64, AccType ) +A64OPC(ExclusiveReadMemory128, U128, U64, U64, AccType ) +A64OPC(WriteMemory8, Void, U64, U64, U8, AccType ) +A64OPC(WriteMemory16, Void, U64, U64, U16, AccType ) +A64OPC(WriteMemory32, Void, U64, U64, U32, AccType ) +A64OPC(WriteMemory64, Void, U64, U64, U64, AccType ) +A64OPC(WriteMemory128, Void, U64, U64, U128, AccType ) +A64OPC(ExclusiveWriteMemory8, U32, U64, U64, U8, AccType ) +A64OPC(ExclusiveWriteMemory16, U32, U64, U64, U16, AccType ) +A64OPC(ExclusiveWriteMemory32, U32, U64, U64, U32, AccType ) +A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType ) +A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType ) + + // A32 Context getters/setters A32OPC(SetCheckBit, Void, U1 ) A32OPC(GetRegister, U32, A32Reg ) @@ -706,65 +772,4 @@ A32OPC(CoprocGetTwoWords, U64, Copr A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 ) A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 ) -// A64 Context getters/setters -A64OPC(SetCheckBit, Void, U1 ) -A64OPC(GetCFlag, U1, ) -A64OPC(GetNZCVRaw, U32, ) -A64OPC(SetNZCVRaw, Void, U32 ) -A64OPC(SetNZCV, Void, NZCV ) -A64OPC(GetW, U32, A64Reg ) -A64OPC(GetX, U64, A64Reg ) -A64OPC(GetS, U128, A64Vec ) -A64OPC(GetD, U128, A64Vec ) -A64OPC(GetQ, U128, A64Vec ) -A64OPC(GetSP, U64, ) -A64OPC(GetFPCR, U32, ) -A64OPC(GetFPSR, U32, ) -A64OPC(SetW, Void, A64Reg, U32 ) -A64OPC(SetX, Void, A64Reg, U64 ) -A64OPC(SetS, Void, A64Vec, U128 ) -A64OPC(SetD, Void, A64Vec, U128 ) -A64OPC(SetQ, Void, A64Vec, U128 ) -A64OPC(SetSP, Void, U64 ) -A64OPC(SetFPCR, Void, U32 ) -A64OPC(SetFPSR, Void, U32 ) -A64OPC(SetPC, Void, U64 ) -A64OPC(CallSupervisor, Void, U32 ) -A64OPC(ExceptionRaised, Void, U64, U64 ) -A64OPC(DataCacheOperationRaised, Void, U64, U64, U64 ) -A64OPC(InstructionCacheOperationRaised, Void, U64, U64 ) -A64OPC(DataSynchronizationBarrier, Void, ) -A64OPC(DataMemoryBarrier, Void, ) -A64OPC(InstructionSynchronizationBarrier, Void, ) -A64OPC(GetCNTFRQ, U32, ) -A64OPC(GetCNTPCT, U64, ) -A64OPC(GetCTR, U32, ) -A64OPC(GetDCZID, U32, ) -A64OPC(GetTPIDR, U64, ) -A64OPC(GetTPIDRRO, U64, ) -A64OPC(SetTPIDR, Void, U64 ) - -// A64 Memory access -A64OPC(ClearExclusive, Void, ) -A64OPC(ReadMemory8, U8, U64, U64, AccType ) -A64OPC(ReadMemory16, U16, U64, U64, AccType ) -A64OPC(ReadMemory32, U32, U64, U64, AccType ) -A64OPC(ReadMemory64, U64, U64, U64, AccType ) -A64OPC(ReadMemory128, U128, U64, U64, AccType ) -A64OPC(ExclusiveReadMemory8, U8, U64, U64, AccType ) -A64OPC(ExclusiveReadMemory16, U16, U64, U64, AccType ) -A64OPC(ExclusiveReadMemory32, U32, U64, U64, AccType ) -A64OPC(ExclusiveReadMemory64, U64, U64, U64, AccType ) -A64OPC(ExclusiveReadMemory128, U128, U64, U64, AccType ) -A64OPC(WriteMemory8, Void, U64, U64, U8, AccType ) -A64OPC(WriteMemory16, Void, U64, U64, U16, AccType ) -A64OPC(WriteMemory32, Void, U64, U64, U32, AccType ) -A64OPC(WriteMemory64, Void, U64, U64, U64, AccType ) -A64OPC(WriteMemory128, Void, U64, U64, U128, AccType ) -A64OPC(ExclusiveWriteMemory8, U32, U64, U64, U8, AccType ) -A64OPC(ExclusiveWriteMemory16, U32, U64, U64, U16, AccType ) -A64OPC(ExclusiveWriteMemory32, U32, U64, U64, U32, AccType ) -A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType ) -A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType ) - // clang-format on