more aggressive inlining, enforce Core 2 duo as minimum cpu

This commit is contained in:
Esther1024
2025-05-19 19:02:08 +01:00
committed by crueter
parent f03f933b31
commit 7b5355fdd1
9 changed files with 382 additions and 418 deletions

View File

@@ -48,6 +48,13 @@ endif()
# Add the module directory to the list of paths
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
# Arch detection
include(DetectArchitecture)
if (NOT DEFINED ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
# Compiler flags
if (MSVC)
set(DYNARMIC_CXX_FLAGS
@@ -91,7 +98,12 @@ else()
-Wextra
-Wcast-qual
-pedantic
-Wno-missing-braces)
-Wno-missing-braces
-Wstack-usage=4096)
if (ARCHITECTURE STREQUAL "x86_64")
list(APPEND DYNARMIC_CXX_FLAGS -mtune=core2)
endif()
if (DYNARMIC_WARNINGS_AS_ERRORS)
list(APPEND DYNARMIC_CXX_FLAGS
@@ -120,13 +132,6 @@ else()
endif()
endif()
# Arch detection
include(DetectArchitecture)
if (NOT DEFINED ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
# Forced use of individual bundled libraries for non-REQUIRED library is possible with e.g. cmake -DCMAKE_DISABLE_FIND_PACKAGE_fmt=ON ...
if (DYNARMIC_USE_BUNDLED_EXTERNALS)

View File

@@ -60,16 +60,15 @@ void EmitIR<IR::Opcode::Pack2x32To1x64>(oaknut::CodeGenerator& code, EmitContext
template<>
void EmitIR<IR::Opcode::Pack2x64To1x128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInGpr() && args[1].IsInGpr()) {
bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() };
if (args_in_gpr[0] && args_in_gpr[1]) {
auto Xlo = ctx.reg_alloc.ReadX(args[0]);
auto Xhi = ctx.reg_alloc.ReadX(args[1]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Xlo, Xhi, Qresult);
code.FMOV(Qresult->toD(), Xlo);
code.MOV(oaknut::VRegSelector{Qresult->index()}.D()[1], Xhi);
} else if (args[0].IsInGpr()) {
} else if (args_in_gpr[0]) {
auto Xlo = ctx.reg_alloc.ReadX(args[0]);
auto Dhi = ctx.reg_alloc.ReadD(args[1]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
@@ -77,7 +76,7 @@ void EmitIR<IR::Opcode::Pack2x64To1x128>(oaknut::CodeGenerator& code, EmitContex
code.FMOV(Qresult->toD(), Xlo);
code.MOV(oaknut::VRegSelector{Qresult->index()}.D()[1], oaknut::VRegSelector{Dhi->index()}.D()[0]);
} else if (args[1].IsInGpr()) {
} else if (args_in_gpr[1]) {
auto Dlo = ctx.reg_alloc.ReadD(args[0]);
auto Xhi = ctx.reg_alloc.ReadX(args[1]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);

View File

@@ -65,8 +65,8 @@ A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_in
A64EmitX64::~A64EmitX64() = default;
A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
if (conf.very_verbose_debugging_output) {
A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
if (conf.very_verbose_debugging_output) [[unlikely]] {
std::puts(IR::DumpBlock(block).c_str());
}
@@ -91,47 +91,54 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
// Start emitting.
code.align();
const u8* const entrypoint = code.getCurr();
const auto* const entrypoint = code.getCurr();
ASSERT(block.GetCondition() == IR::Cond::AL);
static void (EmitX64::*opcode_handlers[])(EmitContext& context, IR::Inst* inst) = {
DEBUG_ASSERT(block.GetCondition() == IR::Cond::AL);
typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst);
constexpr EmitHandlerFn opcode_handlers[] = {
#define OPCODE(name, type, ...) &EmitX64::Emit##name,
#define A32OPC(name, type, ...)
#define A64OPC(name, type, ...)
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
};
typedef void (A64EmitX64::*A64EmitHandlerFn)(A64EmitContext& context, IR::Inst* inst);
constexpr A64EmitHandlerFn a64_handlers[] = {
#define OPCODE(...)
#define A32OPC(...)
#define A64OPC(...)
#define A64OPC(name, type, ...) &A64EmitX64::EmitA64##name,
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
};
for (auto iter = block.begin(); iter != block.end(); ++iter) {
IR::Inst* inst = &*iter;
for (auto& inst : block) {
auto const opcode = inst.GetOpcode();
// Call the relevant Emit* member function.
switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
case IR::Opcode::name: goto true_opcode_branch;
#define A32OPC(...)
#define A64OPC(name, type, ...) \
case IR::Opcode::A64##name: \
A64EmitX64::EmitA64##name(ctx, inst); \
break;
switch (opcode) {
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
#define A32OPC(name, type, ...)
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
default:
ASSERT_MSG(false, "Invalid opcode: {}", inst->GetOpcode());
break;
default: [[unlikely]] {
ASSERT_MSG(false, "Invalid opcode: {}", opcode);
goto finish_this_inst;
}
goto false_opcode_branch;
true_opcode_branch:
(this->*opcode_handlers[size_t(inst->GetOpcode())])(ctx, inst);
false_opcode_branch:
}
opcode_branch:
(this->*opcode_handlers[size_t(opcode)])(ctx, &inst);
goto finish_this_inst;
a64_branch:
(this->*a64_handlers[size_t(opcode) - std::size(opcode_handlers)])(ctx, &inst);
finish_this_inst:
ctx.reg_alloc.EndOfAllocScope();
if (conf.very_verbose_debugging_output) {
if (conf.very_verbose_debugging_output) [[unlikely]] {
EmitVerboseDebuggingOutput(reg_alloc);
}
}

View File

@@ -10,6 +10,7 @@
#include <optional>
#include <tuple>
#include <ankerl/unordered_dense.h>
#include <boost/container/static_vector.hpp>
#include "dynarmic/backend/block_range_information.h"
#include "dynarmic/backend/x64/a64_jitstate.h"
@@ -41,11 +42,9 @@ public:
A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_interface);
~A64EmitX64() override;
/**
* Emit host machine code for a basic block with intermediate representation `block`.
* @note block is modified.
*/
BlockDescriptor Emit(IR::Block& block);
/// Emit host machine code for a basic block with intermediate representation `block`.
/// @note block is modified.
BlockDescriptor Emit(IR::Block& block) noexcept;
void ClearCache() override;
@@ -64,7 +63,7 @@ protected:
void GenMemory128Accessors();
void GenFastmemFallbacks();
void GenTerminalHandlers();
// Microinstruction emitters
void EmitPushRSB(EmitContext& ctx, IR::Inst* inst);
#define OPCODE(...)
@@ -132,13 +131,13 @@ protected:
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
std::set<DoNotFastmemMarker> do_not_fastmem;
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_pop_rsb_hint = nullptr;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
A64::Jit* jit_interface;
void (*memory_read_128)();
void (*memory_write_128)();
void (*memory_exclusive_write_128)();
A64::Jit* jit_interface = nullptr;
void (*memory_read_128)() = nullptr;
void (*memory_write_128)() = nullptr;
void (*memory_exclusive_write_128)() = nullptr;
};
} // namespace Dynarmic::Backend::X64

View File

@@ -55,6 +55,10 @@ std::optional<EmitX64::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescr
return iter->second;
}
void EmitX64::EmitInvalid(EmitContext&, IR::Inst* inst) {
ASSERT_MSG(false, "Invalid opcode: {}", inst->GetOpcode());
}
void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
}

View File

@@ -101,6 +101,7 @@ protected:
#undef OPCODE
#undef A32OPC
#undef A64OPC
void EmitInvalid(EmitContext& ctx, IR::Inst* inst);
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;

View File

@@ -29,12 +29,12 @@ namespace Dynarmic::Backend::X64 {
} \
}()
static bool CanExchange(HostLoc a, HostLoc b) {
static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
// Minimum number of bits required to represent a type
static size_t GetBitWidth(IR::Type type) {
static inline size_t GetBitWidth(const IR::Type type) noexcept {
switch (type) {
case IR::Type::A32Reg:
case IR::Type::A32ExtReg:
@@ -66,7 +66,7 @@ static size_t GetBitWidth(IR::Type type) {
UNREACHABLE();
}
static bool IsValuelessType(IR::Type type) {
static inline bool IsValuelessType(const IR::Type type) noexcept {
switch (type) {
case IR::Type::Table:
return true;
@@ -75,46 +75,14 @@ static bool IsValuelessType(IR::Type type) {
}
}
bool HostLocInfo::IsLocked() const {
return is_being_used_count > 0;
}
bool HostLocInfo::IsEmpty() const {
return is_being_used_count == 0 && values.empty();
}
bool HostLocInfo::IsLastUse() const {
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
}
void HostLocInfo::SetLastUse() {
ASSERT(IsLastUse());
is_set_last_use = true;
}
void HostLocInfo::ReadLock() {
ASSERT(!is_scratch);
is_being_used_count++;
}
void HostLocInfo::WriteLock() {
ASSERT(is_being_used_count == 0);
is_being_used_count++;
is_scratch = true;
}
void HostLocInfo::AddArgReference() {
current_references++;
ASSERT(accumulated_uses + current_references <= total_uses);
}
void HostLocInfo::ReleaseOne() {
void HostLocInfo::ReleaseOne() noexcept {
is_being_used_count--;
is_scratch = false;
if (current_references == 0)
return;
ASSERT(size_t(accumulated_uses) + 1 < std::numeric_limits<uint16_t>::max());
accumulated_uses++;
current_references--;
@@ -122,7 +90,7 @@ void HostLocInfo::ReleaseOne() {
ReleaseAll();
}
void HostLocInfo::ReleaseAll() {
void HostLocInfo::ReleaseAll() noexcept {
accumulated_uses += current_references;
current_references = 0;
@@ -139,27 +107,20 @@ void HostLocInfo::ReleaseAll() {
is_scratch = false;
}
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
return std::find(values.begin(), values.end(), inst) != values.end();
}
size_t HostLocInfo::GetMaxBitWidth() const {
return max_bit_width;
}
void HostLocInfo::AddValue(IR::Inst* inst) {
void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
if (is_set_last_use) {
is_set_last_use = false;
values.clear();
}
values.push_back(inst);
ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits<uint16_t>::max());
total_uses += inst->UseCount();
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
}
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const {
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
using namespace Xbyak::util;
for (IR::Inst* value : values) {
for (auto const value : values) {
code->mov(code->ABI_PARAM1, rsp);
code->mov(code->ABI_PARAM2, host_loc_index);
code->mov(code->ABI_PARAM3, value->GetName());
@@ -168,106 +129,96 @@ void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_
}
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::IsVoid() const {
return GetType() == IR::Type::Void;
}
bool Argument::FitsInImmediateU32() const {
bool Argument::FitsInImmediateU32() const noexcept {
if (!IsImmediate())
return false;
const u64 imm = value.GetImmediateAsU64();
return imm < 0x100000000;
}
bool Argument::FitsInImmediateS32() const {
bool Argument::FitsInImmediateS32() const noexcept {
if (!IsImmediate())
return false;
const s64 imm = static_cast<s64>(value.GetImmediateAsU64());
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
bool Argument::GetImmediateU1() const {
bool Argument::GetImmediateU1() const noexcept {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
u8 Argument::GetImmediateU8() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
u16 Argument::GetImmediateU16() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
u32 Argument::GetImmediateU32() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateS32() const {
u64 Argument::GetImmediateS32() const noexcept {
ASSERT(FitsInImmediateS32());
return value.GetImmediateAsU64();
}
u64 Argument::GetImmediateU64() const {
u64 Argument::GetImmediateU64() const noexcept {
return value.GetImmediateAsU64();
}
IR::Cond Argument::GetImmediateCond() const {
IR::Cond Argument::GetImmediateCond() const noexcept {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
IR::AccType Argument::GetImmediateAccType() const {
IR::AccType Argument::GetImmediateAccType() const noexcept {
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
return value.GetAccType();
}
/// Is this value currently in a GPR?
bool Argument::IsInGpr() const {
bool Argument::IsInGpr() const noexcept {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in a XMM?
bool Argument::IsInXmm() const {
bool Argument::IsInXmm() const noexcept {
if (IsImmediate())
return false;
return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in memory?
bool Argument::IsInMemory() const {
bool Argument::IsInMemory() const noexcept {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order)
: gpr_order(gpr_order)
, xmm_order(xmm_order)
, hostloc_info(NonSpillHostLocCount + SpillCount)
, code(code) {}
RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
: gpr_order(gpr_order),
xmm_order(xmm_order),
code(code)
{
}
//static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0;
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept {
ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
const auto arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
@@ -277,11 +228,10 @@ RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
return ret;
}
void RegAlloc::RegisterPseudoOperation(IR::Inst* inst) {
void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept {
ASSERT(IsValueLive(inst) || !inst->HasUses());
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
const auto arg = inst->GetArg(i);
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
if (const auto loc = ValueLocation(arg.GetInst())) {
// May not necessarily have a value (e.g. CMP variant of Sub32).
@@ -291,90 +241,48 @@ void RegAlloc::RegisterPseudoOperation(IR::Inst* inst) {
}
}
bool RegAlloc::IsValueLive(const IR::Inst* inst) const {
return !!ValueLocation(inst);
}
Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseImpl(arg.value, xmm_order));
}
OpArg RegAlloc::UseOpArg(Argument& arg) {
return UseGpr(arg);
}
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
}
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) {
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) {
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) {
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
DefineValueImpl(inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
}
void RegAlloc::Release(const Xbyak::Reg& reg) {
void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
LocInfo(hostloc).ReleaseOne();
}
Xbyak::Reg64 RegAlloc::ScratchGpr() {
return HostLocToReg64(ScratchImpl(gpr_order));
}
Xbyak::Reg64 RegAlloc::ScratchGpr(HostLoc desired_location) {
return HostLocToReg64(ScratchImpl({desired_location}));
}
Xbyak::Xmm RegAlloc::ScratchXmm() {
return HostLocToXmm(ScratchImpl(xmm_order));
}
Xbyak::Xmm RegAlloc::ScratchXmm(HostLoc desired_location) {
return HostLocToXmm(ScratchImpl({desired_location}));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) {
HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const auto* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
@@ -401,12 +309,12 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) {
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const auto* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t bit_width = GetBitWidth(use_inst->GetType());
@@ -428,7 +336,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) {
HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
const HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
LocInfo(location).WriteLock();
@@ -440,19 +348,17 @@ void RegAlloc::HostCall(IR::Inst* result_def,
const std::optional<Argument::copyable_reference> arg1,
const std::optional<Argument::copyable_reference> arg2,
const std::optional<Argument::copyable_reference> arg3
) {
) noexcept {
constexpr size_t args_count = 4;
constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3};
static const boost::container::static_vector<HostLoc, 28> other_caller_save = [args_hostloc]() {
static const boost::container::static_vector<HostLoc, 28> other_caller_save = [args_hostloc]() noexcept {
boost::container::static_vector<HostLoc, 28> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN));
for (auto hostloc : args_hostloc) {
for (auto const hostloc : args_hostloc) {
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
}
return ret;
}();
@@ -494,41 +400,25 @@ void RegAlloc::HostCall(IR::Inst* result_def,
}
}
void RegAlloc::AllocStackSpace(const size_t stack_space) {
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == 0);
reserved_stack_space = stack_space;
code->sub(code->rsp, static_cast<u32>(stack_space));
}
void RegAlloc::ReleaseStackSpace(const size_t stack_space) {
void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == stack_space);
reserved_stack_space = 0;
code->add(code->rsp, static_cast<u32>(stack_space));
}
void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
}
}
void RegAlloc::AssertNoMoreUses() {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
}
void RegAlloc::EmitVerboseDebuggingOutput() {
for (size_t i = 0; i < hostloc_info.size(); i++) {
hostloc_info[i].EmitVerboseDebuggingOutput(code, i);
}
}
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const {
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
boost::container::static_vector<HostLoc, 28> candidates = desired_locations; //Who let someone copy an ENTIRE VECTOR here?
// Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) noexcept {
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());
@@ -536,30 +426,18 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
// Selects the best location out of the available locations.
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
auto const it = std::find_if(candidates.begin(), candidates.end(), [this](auto const loc) noexcept {
return this->LocInfo(loc).IsEmpty();
});
return candidates.front();
return it != candidates.end() ? *it : candidates.front();
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
for (size_t i = 0; i < hostloc_info.size(); i++) {
if (hostloc_info[i].ContainsValue(value)) {
return static_cast<HostLoc>(i);
}
}
return std::nullopt;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
@@ -574,7 +452,7 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
DefineValueImpl(def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
@@ -602,55 +480,46 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
UNREACHABLE();
}
void RegAlloc::Move(HostLoc to, HostLoc from) {
void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
if (LocInfo(from).IsEmpty()) {
return;
if (!LocInfo(from).IsEmpty()) {
EmitMove(bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
EmitMove(bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
return;
}
if (LocInfo(b).IsEmpty()) {
} else if (LocInfo(b).IsEmpty()) {
Move(b, a);
return;
} else {
EmitExchange(a, b);
std::swap(LocInfo(a), LocInfo(b));
}
EmitExchange(a, b);
std::swap(LocInfo(a), LocInfo(b));
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) {
void RegAlloc::SpillRegister(HostLoc loc) noexcept {
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
@@ -659,7 +528,7 @@ void RegAlloc::SpillRegister(HostLoc loc) {
Move(new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill() const {
HostLoc RegAlloc::FindFreeSpill() const noexcept {
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
const auto loc = static_cast<HostLoc>(i);
if (LocInfo(loc).IsEmpty()) {
@@ -670,24 +539,14 @@ HostLoc RegAlloc::FindFreeSpill() const {
ASSERT_FALSE("All spill locations are full");
}
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
return hostloc_info[static_cast<size_t>(loc)];
}
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
return hostloc_info[static_cast<size_t>(loc)];
}
inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) {
inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
}
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
@@ -766,7 +625,7 @@ void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
}
}
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
@@ -776,7 +635,7 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
}
}
Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept {
return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)];
}

View File

@@ -8,12 +8,12 @@
#include <array>
#include <functional>
#include <optional>
#include <utility>
#include <vector>
#include <mcl/stdint.hpp>
#include <xbyak/xbyak.h>
#include <boost/container/static_vector.hpp>
#include <boost/container/flat_set.hpp>
#include <boost/pool/pool_alloc.hpp>
#include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/hostloc.h"
@@ -33,63 +33,100 @@ class RegAlloc;
struct HostLocInfo {
public:
bool IsLocked() const;
bool IsEmpty() const;
bool IsLastUse() const;
HostLocInfo() {}
inline bool IsLocked() const {
return is_being_used_count > 0;
}
inline bool IsEmpty() const {
return is_being_used_count == 0 && values.empty();
}
inline bool IsLastUse() const {
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
}
inline void SetLastUse() noexcept {
ASSERT(IsLastUse());
is_set_last_use = true;
}
inline void ReadLock() noexcept {
ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits<uint16_t>::max());
ASSERT(!is_scratch);
is_being_used_count++;
}
inline void WriteLock() noexcept {
ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits<uint16_t>::max());
ASSERT(is_being_used_count == 0);
is_being_used_count++;
is_scratch = true;
}
inline void AddArgReference() noexcept {
ASSERT(size_t(current_references) + 1 < std::numeric_limits<uint16_t>::max());
current_references++;
ASSERT(accumulated_uses + current_references <= total_uses);
}
void ReleaseOne() noexcept;
void ReleaseAll() noexcept;
void SetLastUse();
void ReadLock();
void WriteLock();
void AddArgReference();
void ReleaseOne();
void ReleaseAll();
bool ContainsValue(const IR::Inst* inst) const;
size_t GetMaxBitWidth() const;
void AddValue(IR::Inst* inst);
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const;
/// Checks if the given instruction is in our values set
/// SAFETY: Const is casted away, irrelevant since this is only used for checking
inline bool ContainsValue(const IR::Inst* inst) const noexcept {
//return values.contains(const_cast<IR::Inst*>(inst));
return std::find(values.begin(), values.end(), inst) != values.end();
}
inline size_t GetMaxBitWidth() const noexcept {
return max_bit_width;
}
void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
private:
//non trivial
std::vector<IR::Inst*> values; //24
//sometimes zeroed
size_t accumulated_uses = 0; //8
// Block state
size_t total_uses = 0; //8
// Value state
size_t max_bit_width = 0; //8
uint16_t total_uses = 0; //8
//sometimes zeroed
uint16_t accumulated_uses = 0; //8
//always zeroed
// Current instruction state
size_t is_being_used_count = 0; //8
size_t current_references = 0; //8
bool is_scratch = false; //1
bool is_set_last_use = false; //1
uint16_t is_being_used_count = 0; //8
uint16_t current_references = 0; //8
// Value state
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1
alignas(16) char padding;
};
static_assert(sizeof(HostLocInfo) == 72);
static_assert(sizeof(HostLocInfo) == 64);
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool IsVoid() const;
inline IR::Type GetType() const noexcept {
return value.GetType();
}
inline bool IsImmediate() const noexcept {
return value.IsImmediate();
}
inline bool IsVoid() const noexcept {
return GetType() == IR::Type::Void;
}
bool FitsInImmediateU32() const;
bool FitsInImmediateS32() const;
bool FitsInImmediateU32() const noexcept;
bool FitsInImmediateS32() const noexcept;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateS32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
IR::AccType GetImmediateAccType() const;
bool GetImmediateU1() const noexcept;
u8 GetImmediateU8() const noexcept;
u16 GetImmediateU16() const noexcept;
u32 GetImmediateU32() const noexcept;
u64 GetImmediateS32() const noexcept;
u64 GetImmediateU64() const noexcept;
IR::Cond GetImmediateCond() const noexcept;
IR::AccType GetImmediateAccType() const noexcept;
bool IsInGpr() const;
bool IsInXmm() const;
bool IsInMemory() const;
/// Is this value currently in a GPR?
bool IsInGpr() const noexcept;
bool IsInXmm() const noexcept;
bool IsInMemory() const noexcept;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
@@ -103,76 +140,124 @@ private:
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
RegAlloc() = default;
RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order);
RegAlloc() noexcept = default;
RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
void RegisterPseudoOperation(IR::Inst* inst);
bool IsValueLive(const IR::Inst* inst) const;
ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept;
void RegisterPseudoOperation(const IR::Inst* inst) noexcept;
inline bool IsValueLive(const IR::Inst* inst) const noexcept {
return !!ValueLocation(inst);
}
inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, gpr_order));
}
inline Xbyak::Xmm UseXmm(Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseImpl(arg.value, xmm_order));
}
inline OpArg UseOpArg(Argument& arg) noexcept {
return UseGpr(arg);
}
inline void Use(Argument& arg, const HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
}
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept;
Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept;
void UseScratch(Argument& arg, HostLoc host_loc) noexcept;
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
void DefineValue(IR::Inst* inst, Argument& arg) noexcept;
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
void DefineValue(IR::Inst* inst, Argument& arg);
void Release(const Xbyak::Reg& reg) noexcept;
void Release(const Xbyak::Reg& reg);
Xbyak::Reg64 ScratchGpr();
Xbyak::Reg64 ScratchGpr(HostLoc desired_location);
Xbyak::Xmm ScratchXmm();
Xbyak::Xmm ScratchXmm(HostLoc desired_location);
inline Xbyak::Reg64 ScratchGpr() noexcept {
return HostLocToReg64(ScratchImpl(gpr_order));
}
inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept {
return HostLocToReg64(ScratchImpl({desired_location}));
}
inline Xbyak::Xmm ScratchXmm() noexcept {
return HostLocToXmm(ScratchImpl(xmm_order));
}
inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept {
return HostLocToXmm(ScratchImpl({desired_location}));
}
void HostCall(IR::Inst* result_def = nullptr,
const std::optional<Argument::copyable_reference> arg0 = {},
const std::optional<Argument::copyable_reference> arg1 = {},
const std::optional<Argument::copyable_reference> arg2 = {},
const std::optional<Argument::copyable_reference> arg3 = {}
);
) noexcept;
// TODO: Values in host flags
void AllocStackSpace(const size_t stack_space);
void ReleaseStackSpace(const size_t stack_space);
void EndOfAllocScope();
void AssertNoMoreUses();
void EmitVerboseDebuggingOutput();
void AllocStackSpace(const size_t stack_space) noexcept;
void ReleaseStackSpace(const size_t stack_space) noexcept;
inline void EndOfAllocScope() noexcept {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
}
}
inline void AssertNoMoreUses() noexcept {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); }));
}
inline void EmitVerboseDebuggingOutput() noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
hostloc_info[i].EmitVerboseDebuggingOutput(code, i);
}
}
private:
friend struct Argument;
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept;
inline std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
if (hostloc_info[i].ContainsValue(value)) {
return HostLoc(i);
}
}
return std::nullopt;
}
HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations);
HostLoc ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations);
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept;
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc);
void Move(HostLoc to, HostLoc from);
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
void Exchange(HostLoc a, HostLoc b);
void MoveOutOfTheWay(HostLoc reg);
HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept;
void Move(HostLoc to, HostLoc from) noexcept;
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept;
void Exchange(HostLoc a, HostLoc b) noexcept;
void MoveOutOfTheWay(HostLoc reg) noexcept;
void SpillRegister(HostLoc loc);
HostLoc FindFreeSpill() const;
HostLocInfo& LocInfo(HostLoc loc);
const HostLocInfo& LocInfo(HostLoc loc) const;
void SpillRegister(HostLoc loc) noexcept;
HostLoc FindFreeSpill() const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
return hostloc_info[static_cast<size_t>(loc)];
}
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
return hostloc_info[static_cast<size_t>(loc)];
}
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
Xbyak::Address SpillToOpArg(HostLoc loc);
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept;
//data
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
alignas(64) boost::container::static_vector<HostLoc, 28> xmm_order;
alignas(64) boost::container::static_vector<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
alignas(64) std::array<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
BlockOfCode* code = nullptr;
size_t reserved_stack_space = 0;
};

View File

@@ -1,3 +1,7 @@
// First we list common shared opcodes
// Since we give priority to A64 performance, we include them first, this is so we
// can discard all A32 opcodes instead of having a "hole" in our checks
// clang-format off
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
@@ -644,6 +648,68 @@ OPCODE(FPVectorToUnsignedFixed16, U128, U128
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8, U1 )
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8, U1 )
// A64 Context getters/setters
A64OPC(SetCheckBit, Void, U1 )
A64OPC(GetCFlag, U1, )
A64OPC(GetNZCVRaw, U32, )
A64OPC(SetNZCVRaw, Void, U32 )
A64OPC(SetNZCV, Void, NZCV )
A64OPC(GetW, U32, A64Reg )
A64OPC(GetX, U64, A64Reg )
A64OPC(GetS, U128, A64Vec )
A64OPC(GetD, U128, A64Vec )
A64OPC(GetQ, U128, A64Vec )
A64OPC(GetSP, U64, )
A64OPC(GetFPCR, U32, )
A64OPC(GetFPSR, U32, )
A64OPC(SetW, Void, A64Reg, U32 )
A64OPC(SetX, Void, A64Reg, U64 )
A64OPC(SetS, Void, A64Vec, U128 )
A64OPC(SetD, Void, A64Vec, U128 )
A64OPC(SetQ, Void, A64Vec, U128 )
A64OPC(SetSP, Void, U64 )
A64OPC(SetFPCR, Void, U32 )
A64OPC(SetFPSR, Void, U32 )
A64OPC(SetPC, Void, U64 )
A64OPC(CallSupervisor, Void, U32 )
A64OPC(ExceptionRaised, Void, U64, U64 )
A64OPC(DataCacheOperationRaised, Void, U64, U64, U64 )
A64OPC(InstructionCacheOperationRaised, Void, U64, U64 )
A64OPC(DataSynchronizationBarrier, Void, )
A64OPC(DataMemoryBarrier, Void, )
A64OPC(InstructionSynchronizationBarrier, Void, )
A64OPC(GetCNTFRQ, U32, )
A64OPC(GetCNTPCT, U64, )
A64OPC(GetCTR, U32, )
A64OPC(GetDCZID, U32, )
A64OPC(GetTPIDR, U64, )
A64OPC(GetTPIDRRO, U64, )
A64OPC(SetTPIDR, Void, U64 )
// A64 Memory access
A64OPC(ClearExclusive, Void, )
A64OPC(ReadMemory8, U8, U64, U64, AccType )
A64OPC(ReadMemory16, U16, U64, U64, AccType )
A64OPC(ReadMemory32, U32, U64, U64, AccType )
A64OPC(ReadMemory64, U64, U64, U64, AccType )
A64OPC(ReadMemory128, U128, U64, U64, AccType )
A64OPC(ExclusiveReadMemory8, U8, U64, U64, AccType )
A64OPC(ExclusiveReadMemory16, U16, U64, U64, AccType )
A64OPC(ExclusiveReadMemory32, U32, U64, U64, AccType )
A64OPC(ExclusiveReadMemory64, U64, U64, U64, AccType )
A64OPC(ExclusiveReadMemory128, U128, U64, U64, AccType )
A64OPC(WriteMemory8, Void, U64, U64, U8, AccType )
A64OPC(WriteMemory16, Void, U64, U64, U16, AccType )
A64OPC(WriteMemory32, Void, U64, U64, U32, AccType )
A64OPC(WriteMemory64, Void, U64, U64, U64, AccType )
A64OPC(WriteMemory128, Void, U64, U64, U128, AccType )
A64OPC(ExclusiveWriteMemory8, U32, U64, U64, U8, AccType )
A64OPC(ExclusiveWriteMemory16, U32, U64, U64, U16, AccType )
A64OPC(ExclusiveWriteMemory32, U32, U64, U64, U32, AccType )
A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType )
A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType )
// A32 Context getters/setters
A32OPC(SetCheckBit, Void, U1 )
A32OPC(GetRegister, U32, A32Reg )
@@ -706,65 +772,4 @@ A32OPC(CoprocGetTwoWords, U64, Copr
A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 )
A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )
// A64 Context getters/setters
A64OPC(SetCheckBit, Void, U1 )
A64OPC(GetCFlag, U1, )
A64OPC(GetNZCVRaw, U32, )
A64OPC(SetNZCVRaw, Void, U32 )
A64OPC(SetNZCV, Void, NZCV )
A64OPC(GetW, U32, A64Reg )
A64OPC(GetX, U64, A64Reg )
A64OPC(GetS, U128, A64Vec )
A64OPC(GetD, U128, A64Vec )
A64OPC(GetQ, U128, A64Vec )
A64OPC(GetSP, U64, )
A64OPC(GetFPCR, U32, )
A64OPC(GetFPSR, U32, )
A64OPC(SetW, Void, A64Reg, U32 )
A64OPC(SetX, Void, A64Reg, U64 )
A64OPC(SetS, Void, A64Vec, U128 )
A64OPC(SetD, Void, A64Vec, U128 )
A64OPC(SetQ, Void, A64Vec, U128 )
A64OPC(SetSP, Void, U64 )
A64OPC(SetFPCR, Void, U32 )
A64OPC(SetFPSR, Void, U32 )
A64OPC(SetPC, Void, U64 )
A64OPC(CallSupervisor, Void, U32 )
A64OPC(ExceptionRaised, Void, U64, U64 )
A64OPC(DataCacheOperationRaised, Void, U64, U64, U64 )
A64OPC(InstructionCacheOperationRaised, Void, U64, U64 )
A64OPC(DataSynchronizationBarrier, Void, )
A64OPC(DataMemoryBarrier, Void, )
A64OPC(InstructionSynchronizationBarrier, Void, )
A64OPC(GetCNTFRQ, U32, )
A64OPC(GetCNTPCT, U64, )
A64OPC(GetCTR, U32, )
A64OPC(GetDCZID, U32, )
A64OPC(GetTPIDR, U64, )
A64OPC(GetTPIDRRO, U64, )
A64OPC(SetTPIDR, Void, U64 )
// A64 Memory access
A64OPC(ClearExclusive, Void, )
A64OPC(ReadMemory8, U8, U64, U64, AccType )
A64OPC(ReadMemory16, U16, U64, U64, AccType )
A64OPC(ReadMemory32, U32, U64, U64, AccType )
A64OPC(ReadMemory64, U64, U64, U64, AccType )
A64OPC(ReadMemory128, U128, U64, U64, AccType )
A64OPC(ExclusiveReadMemory8, U8, U64, U64, AccType )
A64OPC(ExclusiveReadMemory16, U16, U64, U64, AccType )
A64OPC(ExclusiveReadMemory32, U32, U64, U64, AccType )
A64OPC(ExclusiveReadMemory64, U64, U64, U64, AccType )
A64OPC(ExclusiveReadMemory128, U128, U64, U64, AccType )
A64OPC(WriteMemory8, Void, U64, U64, U8, AccType )
A64OPC(WriteMemory16, Void, U64, U64, U16, AccType )
A64OPC(WriteMemory32, Void, U64, U64, U32, AccType )
A64OPC(WriteMemory64, Void, U64, U64, U64, AccType )
A64OPC(WriteMemory128, Void, U64, U64, U128, AccType )
A64OPC(ExclusiveWriteMemory8, U32, U64, U64, U8, AccType )
A64OPC(ExclusiveWriteMemory16, U32, U64, U64, U16, AccType )
A64OPC(ExclusiveWriteMemory32, U32, U64, U64, U32, AccType )
A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType )
A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType )
// clang-format on