FEXCore: Moves CodeEmitter to FHU

Now that the vixl dependency is gone, this gets moved to FHU since the
frontend is going to need it for a microjit.
This commit is contained in:
Ryan Houdek 2024-05-08 12:00:08 -07:00
parent 1f40590f9a
commit 9e1840e974
No known key found for this signature in database
36 changed files with 3308 additions and 3328 deletions

View File

@ -357,6 +357,7 @@ if (BUILD_TESTS)
endif()
add_subdirectory(FEXHeaderUtils/)
add_subdirectory(CodeEmitter/)
add_subdirectory(FEXCore/)
# Binfmt_misc files must be installed prior to Source/ installs

View File

@ -0,0 +1,2 @@
add_library(CodeEmitter INTERFACE)
target_include_directories(CodeEmitter INTERFACE .)

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,11 @@ public:
public:
// Conditional branch immediate
///< Branch conditional
void b(FEXCore::ARMEmitter::Condition Cond, uint32_t Imm) {
void b(ARMEmitter::Condition Cond, uint32_t Imm) {
constexpr uint32_t Op = 0b0101'010 << 25;
Branch_Conditional(Op, 0, 0, Cond, Imm);
}
void b(FEXCore::ARMEmitter::Condition Cond, BackwardLabel const* Label) {
void b(ARMEmitter::Condition Cond, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
constexpr uint32_t Op = 0b0101'010 << 25;
@ -20,13 +20,13 @@ public:
}
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void b(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) {
void b(ARMEmitter::Condition Cond, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::BC });
constexpr uint32_t Op = 0b0101'010 << 25;
Branch_Conditional(Op, 0, 0, Cond, 0);
}
void b(FEXCore::ARMEmitter::Condition Cond, BiDirectionalLabel *Label) {
void b(ARMEmitter::Condition Cond, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
b(Cond, &Label->Backward);
}
@ -36,11 +36,11 @@ public:
}
///< Branch consistent conditional
void bc(FEXCore::ARMEmitter::Condition Cond, uint32_t Imm) {
void bc(ARMEmitter::Condition Cond, uint32_t Imm) {
constexpr uint32_t Op = 0b0101'010 << 25;
Branch_Conditional(Op, 0, 1, Cond, Imm);
}
void bc(FEXCore::ARMEmitter::Condition Cond, BackwardLabel const* Label) {
void bc(ARMEmitter::Condition Cond, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
constexpr uint32_t Op = 0b0101'010 << 25;
@ -49,13 +49,13 @@ public:
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void bc(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) {
void bc(ARMEmitter::Condition Cond, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::BC });
constexpr uint32_t Op = 0b0101'010 << 25;
Branch_Conditional(Op, 0, 1, Cond, 0);
}
void bc(FEXCore::ARMEmitter::Condition Cond, BiDirectionalLabel *Label) {
void bc(ARMEmitter::Condition Cond, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
bc(Cond, &Label->Backward);
}
@ -65,7 +65,7 @@ public:
}
// Unconditional branch register
void br(FEXCore::ARMEmitter::Register rn) {
void br(ARMEmitter::Register rn) {
constexpr uint32_t Op = 0b1101011 << 25 |
0b0'000 << 21 | // opc
0b1'1111 << 16 | // op2
@ -74,7 +74,7 @@ public:
UnconditionalBranch(Op, rn);
}
void blr(FEXCore::ARMEmitter::Register rn) {
void blr(ARMEmitter::Register rn) {
constexpr uint32_t Op = 0b1101011 << 25 |
0b0'001 << 21 | // opc
0b1'1111 << 16 | // op2
@ -83,7 +83,7 @@ public:
UnconditionalBranch(Op, rn);
}
void ret(FEXCore::ARMEmitter::Register rn = FEXCore::ARMEmitter::Reg::r30) {
void ret(ARMEmitter::Register rn = ARMEmitter::Reg::r30) {
constexpr uint32_t Op = 0b1101011 << 25 |
0b0'010 << 21 | // opc
0b1'1111 << 16 | // op2
@ -156,13 +156,13 @@ public:
}
// Compare and branch
void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, uint32_t Imm) {
void cbz(ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
constexpr uint32_t Op = 0b0011'0100 << 24;
CompareAndBranch(Op, s, rt, Imm);
}
void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, BackwardLabel const* Label) {
void cbz(ARMEmitter::Size s, ARMEmitter::Register rt, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
@ -173,7 +173,7 @@ public:
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) {
void cbz(ARMEmitter::Size s, ARMEmitter::Register rt, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::BC });
constexpr uint32_t Op = 0b0011'0100 << 24;
@ -181,7 +181,7 @@ public:
CompareAndBranch(Op, s, rt, 0);
}
void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, BiDirectionalLabel *Label) {
void cbz(ARMEmitter::Size s, ARMEmitter::Register rt, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
cbz(s, rt, &Label->Backward);
}
@ -190,13 +190,13 @@ public:
}
}
void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, uint32_t Imm) {
void cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
constexpr uint32_t Op = 0b0011'0101 << 24;
CompareAndBranch(Op, s, rt, Imm);
}
void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, BackwardLabel const* Label) {
void cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
@ -207,7 +207,7 @@ public:
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) {
void cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::BC });
constexpr uint32_t Op = 0b0011'0101 << 24;
@ -215,7 +215,7 @@ public:
CompareAndBranch(Op, s, rt, 0);
}
void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, BiDirectionalLabel *Label) {
void cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
cbnz(s, rt, &Label->Backward);
}
@ -225,12 +225,12 @@ public:
}
// Test and branch immediate
void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
void tbz(ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
constexpr uint32_t Op = 0b0011'0110 << 24;
TestAndBranch(Op, rt, Bit, Imm);
}
void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, BackwardLabel const* Label) {
void tbz(ARMEmitter::Register rt, uint32_t Bit, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large");
@ -241,7 +241,7 @@ public:
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) {
void tbz(ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH });
constexpr uint32_t Op = 0b0011'0110 << 24;
@ -249,7 +249,7 @@ public:
TestAndBranch(Op, rt, Bit, 0);
}
void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel *Label) {
void tbz(ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
tbz(rt, Bit, &Label->Backward);
}
@ -258,12 +258,12 @@ public:
}
}
void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
void tbnz(ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
constexpr uint32_t Op = 0b0011'0111 << 24;
TestAndBranch(Op, rt, Bit, Imm);
}
void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, BackwardLabel const* Label) {
void tbnz(ARMEmitter::Register rt, uint32_t Bit, BackwardLabel const* Label) {
int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large");
@ -274,14 +274,14 @@ public:
template<typename LabelType>
requires (std::is_same_v<LabelType, ForwardLabel> || std::is_same_v<LabelType, SingleUseForwardLabel>)
void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) {
void tbnz(ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) {
AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress<uint8_t*>(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH });
constexpr uint32_t Op = 0b0011'0111 << 24;
TestAndBranch(Op, rt, Bit, 0);
}
void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel *Label) {
void tbnz(ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel *Label) {
if (Label->Backward.Location) {
tbnz(rt, Bit, &Label->Backward);
}
@ -292,7 +292,7 @@ public:
private:
// Conditional branch immediate
void Branch_Conditional(uint32_t Op, uint32_t Op1, uint32_t Op0, FEXCore::ARMEmitter::Condition Cond, uint32_t Imm) {
void Branch_Conditional(uint32_t Op, uint32_t Op1, uint32_t Op0, ARMEmitter::Condition Cond, uint32_t Imm) {
uint32_t Instr = Op;
Instr |= Op1 << 24;
@ -304,7 +304,7 @@ private:
}
// Unconditional branch register
void UnconditionalBranch(uint32_t Op, FEXCore::ARMEmitter::Register rn) {
void UnconditionalBranch(uint32_t Op, ARMEmitter::Register rn) {
uint32_t Instr = Op;
Instr |= Encode_rn(rn);
dc32(Instr);
@ -318,8 +318,8 @@ private:
}
// Compare and branch
void CompareAndBranch(uint32_t Op, FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, uint32_t Imm) {
const uint32_t SF = s == FEXCore::ARMEmitter::Size::i64Bit ? (1U << 31) : 0;
void CompareAndBranch(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;
uint32_t Instr = Op;
@ -330,7 +330,7 @@ private:
}
// Test and branch - immediate
void TestAndBranch(uint32_t Op, FEXCore::ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
void TestAndBranch(uint32_t Op, ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
uint32_t Instr = Op;
Instr |= (Bit >> 5) << 31;

View File

@ -4,7 +4,7 @@
#include <cstdint>
#include <cstring>
namespace FEXCore::ARMEmitter {
namespace ARMEmitter {
class Buffer {
public:
Buffer() {
@ -103,4 +103,4 @@ protected:
uint8_t* CurrentOffset;
uint64_t Size;
};
} // namespace FEXCore::ARMEmitter
} // namespace ARMEmitter

View File

@ -1,9 +1,6 @@
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/ArchHelpers/CodeEmitter/Buffer.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
@ -11,6 +8,8 @@
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/BitUtils.h>
#include <CodeEmitter/Buffer.h>
#include <CodeEmitter/Registers.h>
#include <array>
#include <cstdint>
@ -54,7 +53,7 @@
* it easier to select the correct load-store instruction. Mostly because these are a nightmare selecting
* the right instruction.
*/
namespace FEXCore::ARMEmitter {
namespace ARMEmitter {
/*
* This `Size` enum is used for most ALU operations.
* These follow the AArch64 encoding style in most cases.
@ -609,7 +608,7 @@ constexpr bool AreVectorsSequential(T first, const Args&... args) {
// Choices:
// - Size of ops passed as an argument rather than template to let the compiler use csel instead of branching.
// - Registers are unsized so they can be passed in a GPR and not need conversion operations
class Emitter : public FEXCore::ARMEmitter::Buffer {
class Emitter : public ARMEmitter::Buffer {
public:
Emitter() = default;
@ -804,7 +803,7 @@ public:
}
}
uint8_t ToImm8() {
uint8_t ToImm8() const {
// ARM imm8 float encoding
// Bit[7] - Sign
// Bit[6] - Exponent
@ -831,17 +830,17 @@ public:
}
};
#include "Interface/Core/ArchHelpers/CodeEmitter/VixlUtils.inl"
#include <CodeEmitter/VixlUtils.inl>
public:
// TODO: Implement SME when it matters.
#include "Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/SystemOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/ScalarOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/ASIMDOps.inl"
#include "Interface/Core/ArchHelpers/CodeEmitter/SVEOps.inl"
#include <CodeEmitter/ALUOps.inl>
#include <CodeEmitter/BranchOps.inl>
#include <CodeEmitter/LoadstoreOps.inl>
#include <CodeEmitter/SystemOps.inl>
#include <CodeEmitter/ScalarOps.inl>
#include <CodeEmitter/ASIMDOps.inl>
#include <CodeEmitter/SVEOps.inl>
private:
template<typename T>
@ -903,4 +902,4 @@ private:
return FEXCore::ToUnderlying(Reg);
}
};
} // namespace FEXCore::ARMEmitter
} // namespace ARMEmitter

View File

@ -6,7 +6,7 @@
#include <compare>
#include <cstdint>
namespace FEXCore::ARMEmitter {
namespace ARMEmitter {
class WRegister;
class XRegister;
@ -1024,4 +1024,4 @@ enum class OpType : uint32_t {
Destructive = 0,
Constructive,
};
} // namespace FEXCore::ARMEmitter
} // namespace ARMEmitter

View File

@ -1506,10 +1506,10 @@ public:
}
// SVE broadcast floating-point immediate (unpredicated)
void fdup(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, float Value) {
LOGMAN_THROW_AA_FMT(size == FEXCore::ARMEmitter::SubRegSize::i16Bit ||
size == FEXCore::ARMEmitter::SubRegSize::i32Bit ||
size == FEXCore::ARMEmitter::SubRegSize::i64Bit, "Unsupported fmov size");
void fdup(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, float Value) {
LOGMAN_THROW_AA_FMT(size == ARMEmitter::SubRegSize::i16Bit ||
size == ARMEmitter::SubRegSize::i32Bit ||
size == ARMEmitter::SubRegSize::i64Bit, "Unsupported fmov size");
uint32_t Imm{};
if (size == SubRegSize::i16Bit) {
Imm = FP16ToImm8(Float16(Value));
@ -1521,7 +1521,7 @@ public:
SVEBroadcastFloatImmUnpredicated(0b00, 0, Imm, size, zd);
}
void fmov(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, float Value) {
void fmov(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, float Value) {
fdup(size, zd, Value);
}
@ -3717,7 +3717,7 @@ private:
// SVE bitwise logical operations (predicated)
void SVEBitwiseLogicalPredicated(uint32_t opc, SubRegSize size, PRegister pg, ZRegister zdn, ZRegister zm, ZRegister zd) {
LOGMAN_THROW_AA_FMT(size != FEXCore::ARMEmitter::SubRegSize::i128Bit, "Can't use 128-bit size");
LOGMAN_THROW_AA_FMT(size != ARMEmitter::SubRegSize::i128Bit, "Can't use 128-bit size");
LOGMAN_THROW_A_FMT(zd == zdn, "zd needs to equal zdn");
LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
@ -4743,7 +4743,7 @@ private:
dc32(Instr);
}
void SVEPermuteVector(uint32_t op0, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::ZRegister zm, uint32_t Imm) {
void SVEPermuteVector(uint32_t op0, ARMEmitter::ZRegister zd, ARMEmitter::ZRegister zm, uint32_t Imm) {
constexpr uint32_t Op = 0b0000'0101'0010'0000'000 << 13;
uint32_t Instr = Op;

View File

@ -33,7 +33,7 @@ public:
ASIMDScalarCopy(Op, 1, imm5, 0b0000, rd, rn);
}
void mov(FEXCore::ARMEmitter::ScalarRegSize size, FEXCore::ARMEmitter::VRegister rd, FEXCore::ARMEmitter::VRegister rn, uint32_t Index) {
void mov(ARMEmitter::ScalarRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, uint32_t Index) {
dup(size, rd, rn, Index);
}
@ -1052,21 +1052,21 @@ public:
}
// Floating-point immediate
void fmov(FEXCore::ARMEmitter::ScalarRegSize size, FEXCore::ARMEmitter::VRegister rd, float Value) {
void fmov(ARMEmitter::ScalarRegSize size, ARMEmitter::VRegister rd, float Value) {
uint32_t M = 0;
uint32_t S = 0;
uint32_t ptype;
uint32_t imm8;
uint32_t imm5 = 0b0'0000;
if (size == FEXCore::ARMEmitter::ScalarRegSize::i16Bit) {
if (size == ARMEmitter::ScalarRegSize::i16Bit) {
ptype = 0b11;
imm8 = FP16ToImm8(Float16(Value));
}
else if (size == FEXCore::ARMEmitter::ScalarRegSize::i32Bit) {
else if (size == ARMEmitter::ScalarRegSize::i32Bit) {
ptype = 0b00;
imm8 = FP32ToImm8(Value);
}
else if (size == FEXCore::ARMEmitter::ScalarRegSize::i64Bit) {
else if (size == ARMEmitter::ScalarRegSize::i64Bit) {
ptype = 0b01;
imm8 = FP64ToImm8(Value);
}
@ -1077,7 +1077,7 @@ public:
FloatScalarImmediate(M, S, ptype, imm8, imm5, rd);
}
void FloatScalarImmediate(uint32_t M, uint32_t S, uint32_t ptype, uint32_t imm8, uint32_t imm5, FEXCore::ARMEmitter::VRegister rd) {
void FloatScalarImmediate(uint32_t M, uint32_t S, uint32_t ptype, uint32_t imm8, uint32_t imm5, ARMEmitter::VRegister rd) {
constexpr uint32_t Op = 0b0001'1110'0010'0000'0001'00 << 10;
uint32_t Instr = Op;
@ -1286,7 +1286,7 @@ public:
private:
// Advanced SIMD scalar copy
void ASIMDScalarCopy(uint32_t Op, uint32_t Q, uint32_t imm5, uint32_t imm4, FEXCore::ARMEmitter::VRegister rd, FEXCore::ARMEmitter::VRegister rn) {
void ASIMDScalarCopy(uint32_t Op, uint32_t Q, uint32_t imm5, uint32_t imm4, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn) {
uint32_t Instr = Op;
Instr |= Q << 30;

View File

@ -11,7 +11,7 @@ public:
// TODO: AT
// TODO: CFP
// TODO: CPP
void dc(FEXCore::ARMEmitter::DataCacheOperation DCOp, FEXCore::ARMEmitter::Register rt) {
void dc(ARMEmitter::DataCacheOperation DCOp, ARMEmitter::Register rt) {
constexpr uint32_t Op = 0b1101'0101'0000'1000'0111 << 12;
SystemInstruction(Op, 0, FEXCore::ToUnderlying(DCOp), rt);
}
@ -48,67 +48,67 @@ public:
ExceptionGeneration(0b101, 0b000, 0b11, Imm);
}
// System instructions with register argument
void wfet(FEXCore::ARMEmitter::Register rt) {
void wfet(ARMEmitter::Register rt) {
SystemInstructionWithReg(0b0000, 0b000, rt);
}
void wfit(FEXCore::ARMEmitter::Register rt) {
void wfit(ARMEmitter::Register rt) {
SystemInstructionWithReg(0b0000, 0b001, rt);
}
// Hints
void nop() {
Hint(FEXCore::ARMEmitter::HintRegister::NOP);
Hint(ARMEmitter::HintRegister::NOP);
}
void yield() {
Hint(FEXCore::ARMEmitter::HintRegister::YIELD);
Hint(ARMEmitter::HintRegister::YIELD);
}
void wfe() {
Hint(FEXCore::ARMEmitter::HintRegister::WFE);
Hint(ARMEmitter::HintRegister::WFE);
}
void wfi() {
Hint(FEXCore::ARMEmitter::HintRegister::WFI);
Hint(ARMEmitter::HintRegister::WFI);
}
void sev() {
Hint(FEXCore::ARMEmitter::HintRegister::SEV);
Hint(ARMEmitter::HintRegister::SEV);
}
void sevl() {
Hint(FEXCore::ARMEmitter::HintRegister::SEVL);
Hint(ARMEmitter::HintRegister::SEVL);
}
void dgh() {
Hint(FEXCore::ARMEmitter::HintRegister::DGH);
Hint(ARMEmitter::HintRegister::DGH);
}
void csdb() {
Hint(FEXCore::ARMEmitter::HintRegister::CSDB);
Hint(ARMEmitter::HintRegister::CSDB);
}
// Barriers
void clrex(uint32_t imm = 15) {
LOGMAN_THROW_AA_FMT(imm < 16, "Immediate out of range");
Barrier(FEXCore::ARMEmitter::BarrierRegister::CLREX, imm);
Barrier(ARMEmitter::BarrierRegister::CLREX, imm);
}
void dsb(FEXCore::ARMEmitter::BarrierScope Scope) {
Barrier(FEXCore::ARMEmitter::BarrierRegister::DSB, FEXCore::ToUnderlying(Scope));
void dsb(ARMEmitter::BarrierScope Scope) {
Barrier(ARMEmitter::BarrierRegister::DSB, FEXCore::ToUnderlying(Scope));
}
void dmb(FEXCore::ARMEmitter::BarrierScope Scope) {
Barrier(FEXCore::ARMEmitter::BarrierRegister::DMB, FEXCore::ToUnderlying(Scope));
void dmb(ARMEmitter::BarrierScope Scope) {
Barrier(ARMEmitter::BarrierRegister::DMB, FEXCore::ToUnderlying(Scope));
}
void isb() {
Barrier(FEXCore::ARMEmitter::BarrierRegister::ISB, FEXCore::ToUnderlying(FEXCore::ARMEmitter::BarrierScope::SY));
Barrier(ARMEmitter::BarrierRegister::ISB, FEXCore::ToUnderlying(ARMEmitter::BarrierScope::SY));
}
void sb() {
Barrier(FEXCore::ARMEmitter::BarrierRegister::SB, 0);
Barrier(ARMEmitter::BarrierRegister::SB, 0);
}
void tcommit() {
Barrier(FEXCore::ARMEmitter::BarrierRegister::TCOMMIT, 0);
Barrier(ARMEmitter::BarrierRegister::TCOMMIT, 0);
}
// System register move
void msr(FEXCore::ARMEmitter::SystemRegister reg, FEXCore::ARMEmitter::Register rt) {
void msr(ARMEmitter::SystemRegister reg, ARMEmitter::Register rt) {
constexpr uint32_t Op = 0b1101'0101'0001 << 20;
SystemRegisterMove(Op, rt, reg);
}
void mrs(FEXCore::ARMEmitter::Register rd, FEXCore::ARMEmitter::SystemRegister reg) {
void mrs(ARMEmitter::Register rd, ARMEmitter::SystemRegister reg) {
constexpr uint32_t Op = 0b1101'0101'0011 << 20;
SystemRegisterMove(Op, rd, reg);
}
@ -130,7 +130,7 @@ private:
}
// System instructions with register argument
void SystemInstructionWithReg(uint32_t CRm, uint32_t op2, FEXCore::ARMEmitter::Register rt) {
void SystemInstructionWithReg(uint32_t CRm, uint32_t op2, ARMEmitter::Register rt) {
uint32_t Instr = 0b1101'0101'0000'0011'0001 << 12;
Instr |= CRm << 8;
@ -140,13 +140,13 @@ private:
}
// Hints
void Hint(FEXCore::ARMEmitter::HintRegister Reg) {
void Hint(ARMEmitter::HintRegister Reg) {
uint32_t Instr = 0b1101'0101'0000'0011'0010'0000'0001'1111U;
Instr |= FEXCore::ToUnderlying(Reg);
dc32(Instr);
}
// Barriers
void Barrier(FEXCore::ARMEmitter::BarrierRegister Reg, uint32_t CRm) {
void Barrier(ARMEmitter::BarrierRegister Reg, uint32_t CRm) {
uint32_t Instr = 0b1101'0101'0000'0011'0011'0000'0001'1111U;
Instr |= CRm << 8;
Instr |= FEXCore::ToUnderlying(Reg);
@ -154,7 +154,7 @@ private:
}
// System Instruction
void SystemInstruction(uint32_t Op, uint32_t L, uint32_t SubOp, FEXCore::ARMEmitter::Register rt) {
void SystemInstruction(uint32_t Op, uint32_t L, uint32_t SubOp, ARMEmitter::Register rt) {
uint32_t Instr = Op;
Instr |= L << 21;
@ -165,7 +165,7 @@ private:
}
// System register move
void SystemRegisterMove(uint32_t Op, FEXCore::ARMEmitter::Register rt, FEXCore::ARMEmitter::SystemRegister reg) {
void SystemRegisterMove(uint32_t Op, ARMEmitter::Register rt, ARMEmitter::SystemRegister reg) {
uint32_t Instr = Op;
Instr |= FEXCore::ToUnderlying(reg);

View File

@ -193,7 +193,7 @@ endif()
# Some defines for the softfloat library
list(APPEND DEFINES "-DSOFTFLOAT_BUILTIN_CLZ")
set (LIBS fmt::fmt vixl xxHash::xxhash FEXHeaderUtils)
set (LIBS fmt::fmt vixl xxHash::xxhash FEXHeaderUtils CodeEmitter)
if (NOT MINGW_BUILD)
list (APPEND LIBS dl)

View File

@ -2,8 +2,6 @@
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "FEXCore/Core/X86Enums.h"
#include "FEXCore/Utils/AllocatorHooks.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Context/Context.h"
#include "Interface/HLE/Thunks/Thunks.h"
@ -13,6 +11,8 @@
#include <FEXCore/Utils/MathUtils.h>
#include <FEXHeaderUtils/BitUtils.h>
#include <CodeEmitter/Emitter.h>
#include <CodeEmitter/Registers.h>
#include <aarch64/cpu-aarch64.h>
#include <aarch64/instructions-aarch64.h>
@ -31,110 +31,108 @@ namespace FEXCore::CPU {
namespace x64 {
#ifndef _M_ARM_64EC
// All but x19 and x29 are caller saved
constexpr std::array<FEXCore::ARMEmitter::Register, 18> SRA = {
FEXCore::ARMEmitter::Reg::r4,
FEXCore::ARMEmitter::Reg::r5,
FEXCore::ARMEmitter::Reg::r6,
FEXCore::ARMEmitter::Reg::r7,
FEXCore::ARMEmitter::Reg::r8,
FEXCore::ARMEmitter::Reg::r9,
FEXCore::ARMEmitter::Reg::r10,
FEXCore::ARMEmitter::Reg::r11,
FEXCore::ARMEmitter::Reg::r12,
FEXCore::ARMEmitter::Reg::r13,
FEXCore::ARMEmitter::Reg::r14,
FEXCore::ARMEmitter::Reg::r15,
FEXCore::ARMEmitter::Reg::r16,
FEXCore::ARMEmitter::Reg::r17,
FEXCore::ARMEmitter::Reg::r19,
FEXCore::ARMEmitter::Reg::r29,
constexpr std::array<ARMEmitter::Register, 18> SRA = {
ARMEmitter::Reg::r4,
ARMEmitter::Reg::r5,
ARMEmitter::Reg::r6,
ARMEmitter::Reg::r7,
ARMEmitter::Reg::r8,
ARMEmitter::Reg::r9,
ARMEmitter::Reg::r10,
ARMEmitter::Reg::r11,
ARMEmitter::Reg::r12,
ARMEmitter::Reg::r13,
ARMEmitter::Reg::r14,
ARMEmitter::Reg::r15,
ARMEmitter::Reg::r16,
ARMEmitter::Reg::r17,
ARMEmitter::Reg::r19,
ARMEmitter::Reg::r29,
// PF/AF must be last.
REG_PF,
REG_AF,
};
constexpr std::array<FEXCore::ARMEmitter::Register, 7> RA = {
constexpr std::array<ARMEmitter::Register, 7> RA = {
// All these callee saved
FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21, FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23,
FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25, FEXCore::ARMEmitter::Reg::r30,
ARMEmitter::Reg::r20, ARMEmitter::Reg::r21, ARMEmitter::Reg::r22, ARMEmitter::Reg::r23,
ARMEmitter::Reg::r24, ARMEmitter::Reg::r25, ARMEmitter::Reg::r30,
};
constexpr std::array<std::pair<FEXCore::ARMEmitter::Register, FEXCore::ARMEmitter::Register>, 3> RAPair = {{
{FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21},
{FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23},
{FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25},
constexpr std::array<std::pair<ARMEmitter::Register, ARMEmitter::Register>, 3> RAPair = {{
{ARMEmitter::Reg::r20, ARMEmitter::Reg::r21},
{ARMEmitter::Reg::r22, ARMEmitter::Reg::r23},
{ARMEmitter::Reg::r24, ARMEmitter::Reg::r25},
}};
// All are caller saved
constexpr std::array<FEXCore::ARMEmitter::VRegister, 16> SRAFPR = {
FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19,
FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23,
FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27,
FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31};
constexpr std::array<ARMEmitter::VRegister, 16> SRAFPR = {
ARMEmitter::VReg::v16, ARMEmitter::VReg::v17, ARMEmitter::VReg::v18, ARMEmitter::VReg::v19,
ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22, ARMEmitter::VReg::v23,
ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27,
ARMEmitter::VReg::v28, ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};
// v8..v15 = (lower 64bits) Callee saved
constexpr std::array<FEXCore::ARMEmitter::VRegister, 14> RAFPR = {
constexpr std::array<ARMEmitter::VRegister, 14> RAFPR = {
// v0 ~ v1 are used as temps.
// FEXCore::ARMEmitter::VReg::v0, FEXCore::ARMEmitter::VReg::v1,
// ARMEmitter::VReg::v0, ARMEmitter::VReg::v1,
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5,
FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9,
FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13,
FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6,
ARMEmitter::VReg::v7, ARMEmitter::VReg::v8, ARMEmitter::VReg::v9, ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
};
#else
constexpr std::array<FEXCore::ARMEmitter::Register, 18> SRA = {
FEXCore::ARMEmitter::Reg::r8,
FEXCore::ARMEmitter::Reg::r0,
FEXCore::ARMEmitter::Reg::r1,
FEXCore::ARMEmitter::Reg::r27,
constexpr std::array<ARMEmitter::Register, 18> SRA = {
ARMEmitter::Reg::r8,
ARMEmitter::Reg::r0,
ARMEmitter::Reg::r1,
ARMEmitter::Reg::r27,
// SP's register location isn't specified by the ARM64EC ABI, we choose to use r23
FEXCore::ARMEmitter::Reg::r23,
FEXCore::ARMEmitter::Reg::r29,
FEXCore::ARMEmitter::Reg::r25,
FEXCore::ARMEmitter::Reg::r26,
FEXCore::ARMEmitter::Reg::r2,
FEXCore::ARMEmitter::Reg::r3,
FEXCore::ARMEmitter::Reg::r4,
FEXCore::ARMEmitter::Reg::r5,
FEXCore::ARMEmitter::Reg::r19,
FEXCore::ARMEmitter::Reg::r20,
FEXCore::ARMEmitter::Reg::r21,
FEXCore::ARMEmitter::Reg::r22,
ARMEmitter::Reg::r23,
ARMEmitter::Reg::r29,
ARMEmitter::Reg::r25,
ARMEmitter::Reg::r26,
ARMEmitter::Reg::r2,
ARMEmitter::Reg::r3,
ARMEmitter::Reg::r4,
ARMEmitter::Reg::r5,
ARMEmitter::Reg::r19,
ARMEmitter::Reg::r20,
ARMEmitter::Reg::r21,
ARMEmitter::Reg::r22,
REG_PF,
REG_AF,
};
constexpr std::array<FEXCore::ARMEmitter::Register, 7> RA = {
FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, FEXCore::ARMEmitter::Reg::r14, FEXCore::ARMEmitter::Reg::r15,
FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, FEXCore::ARMEmitter::Reg::r30,
constexpr std::array<ARMEmitter::Register, 7> RA = {
ARMEmitter::Reg::r6, ARMEmitter::Reg::r7, ARMEmitter::Reg::r14, ARMEmitter::Reg::r15,
ARMEmitter::Reg::r16, ARMEmitter::Reg::r17, ARMEmitter::Reg::r30,
};
constexpr std::array<std::pair<FEXCore::ARMEmitter::Register, FEXCore::ARMEmitter::Register>, 3> RAPair = {{
{FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7},
{FEXCore::ARMEmitter::Reg::r14, FEXCore::ARMEmitter::Reg::r15},
{FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17},
constexpr std::array<std::pair<ARMEmitter::Register, ARMEmitter::Register>, 3> RAPair = {{
{ARMEmitter::Reg::r6, ARMEmitter::Reg::r7},
{ARMEmitter::Reg::r14, ARMEmitter::Reg::r15},
{ARMEmitter::Reg::r16, ARMEmitter::Reg::r17},
}};
constexpr std::array<FEXCore::ARMEmitter::VRegister, 16> SRAFPR = {
FEXCore::ARMEmitter::VReg::v0, FEXCore::ARMEmitter::VReg::v1, FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3,
FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7,
FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11,
FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15,
constexpr std::array<ARMEmitter::VRegister, 16> SRAFPR = {
ARMEmitter::VReg::v0, ARMEmitter::VReg::v1, ARMEmitter::VReg::v2, ARMEmitter::VReg::v3,
ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
ARMEmitter::VReg::v8, ARMEmitter::VReg::v9, ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
};
constexpr std::array<FEXCore::ARMEmitter::VRegister, 14> RAFPR = {
FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19, FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21,
FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23, FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25,
FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29,
FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31};
constexpr std::array<ARMEmitter::VRegister, 14> RAFPR = {
ARMEmitter::VReg::v18, ARMEmitter::VReg::v19, ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22,
ARMEmitter::VReg::v23, ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27,
ARMEmitter::VReg::v28, ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};
#endif
// I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15.
// SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI.
constexpr std::array<FEXCore::ARMEmitter::Register, 7> PreserveAll_SRA = {
FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7,
FEXCore::ARMEmitter::Reg::r8, FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17,
constexpr std::array<ARMEmitter::Register, 7> PreserveAll_SRA = {
ARMEmitter::Reg::r4, ARMEmitter::Reg::r5, ARMEmitter::Reg::r6, ARMEmitter::Reg::r7,
ARMEmitter::Reg::r8, ARMEmitter::Reg::r16, ARMEmitter::Reg::r17,
};
constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t {
@ -160,12 +158,12 @@ namespace x64 {
}()};
// Dynamic GPRs
constexpr std::array<FEXCore::ARMEmitter::Register, 1> PreserveAll_Dynamic = {
constexpr std::array<ARMEmitter::Register, 1> PreserveAll_Dynamic = {
// Only LR needs to get saved.
FEXCore::ARMEmitter::Reg::r30};
ARMEmitter::Reg::r30};
// SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI.
constexpr std::array<FEXCore::ARMEmitter::Register, 0> PreserveAll_SRAFPR = {
constexpr std::array<ARMEmitter::Register, 0> PreserveAll_SRAFPR = {
// None.
};
@ -179,15 +177,14 @@ namespace x64 {
// Dynamic FPRs
// - v0-v7
constexpr std::array<FEXCore::ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
constexpr std::array<ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
// v0 ~ v1 are temps
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4,
FEXCore::ARMEmitter::VReg::v5, FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
};
// SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI.
// This is /all/ of the SRA registers
constexpr std::array<FEXCore::ARMEmitter::VRegister, 16> PreserveAll_SRAFPRSVE = SRAFPR;
constexpr std::array<ARMEmitter::VRegister, 16> PreserveAll_SRAFPRSVE = SRAFPR;
constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t {
uint32_t Mask {};
@ -198,89 +195,86 @@ namespace x64 {
}()};
// Dynamic FPRs when the host supports SVE-256bit.
constexpr std::array<FEXCore::ARMEmitter::VRegister, 14> PreserveAll_DynamicFPRSVE = {
constexpr std::array<ARMEmitter::VRegister, 14> PreserveAll_DynamicFPRSVE = {
// v0 ~ v1 are used as temps.
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5,
FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9,
FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13,
FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6,
ARMEmitter::VReg::v7, ARMEmitter::VReg::v8, ARMEmitter::VReg::v9, ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
};
} // namespace x64
namespace x32 {
// All but x19 and x29 are caller saved
constexpr std::array<FEXCore::ARMEmitter::Register, 10> SRA = {
FEXCore::ARMEmitter::Reg::r4,
FEXCore::ARMEmitter::Reg::r5,
FEXCore::ARMEmitter::Reg::r6,
FEXCore::ARMEmitter::Reg::r7,
FEXCore::ARMEmitter::Reg::r8,
FEXCore::ARMEmitter::Reg::r9,
FEXCore::ARMEmitter::Reg::r10,
FEXCore::ARMEmitter::Reg::r11,
constexpr std::array<ARMEmitter::Register, 10> SRA = {
ARMEmitter::Reg::r4,
ARMEmitter::Reg::r5,
ARMEmitter::Reg::r6,
ARMEmitter::Reg::r7,
ARMEmitter::Reg::r8,
ARMEmitter::Reg::r9,
ARMEmitter::Reg::r10,
ARMEmitter::Reg::r11,
// PF/AF must be last.
REG_PF,
REG_AF,
};
constexpr std::array<FEXCore::ARMEmitter::Register, 15> RA = {
constexpr std::array<ARMEmitter::Register, 15> RA = {
// All these callee saved
FEXCore::ARMEmitter::Reg::r20,
FEXCore::ARMEmitter::Reg::r21,
FEXCore::ARMEmitter::Reg::r22,
FEXCore::ARMEmitter::Reg::r23,
FEXCore::ARMEmitter::Reg::r24,
FEXCore::ARMEmitter::Reg::r25,
ARMEmitter::Reg::r20,
ARMEmitter::Reg::r21,
ARMEmitter::Reg::r22,
ARMEmitter::Reg::r23,
ARMEmitter::Reg::r24,
ARMEmitter::Reg::r25,
// Registers only available on 32-bit
// All these are caller saved (except for r19).
FEXCore::ARMEmitter::Reg::r12,
FEXCore::ARMEmitter::Reg::r13,
FEXCore::ARMEmitter::Reg::r14,
FEXCore::ARMEmitter::Reg::r15,
FEXCore::ARMEmitter::Reg::r16,
FEXCore::ARMEmitter::Reg::r17,
FEXCore::ARMEmitter::Reg::r29,
FEXCore::ARMEmitter::Reg::r30,
ARMEmitter::Reg::r12,
ARMEmitter::Reg::r13,
ARMEmitter::Reg::r14,
ARMEmitter::Reg::r15,
ARMEmitter::Reg::r16,
ARMEmitter::Reg::r17,
ARMEmitter::Reg::r29,
ARMEmitter::Reg::r30,
FEXCore::ARMEmitter::Reg::r19,
ARMEmitter::Reg::r19,
};
constexpr std::array<std::pair<FEXCore::ARMEmitter::Register, FEXCore::ARMEmitter::Register>, 7> RAPair = {{
{FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21},
{FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23},
{FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25},
constexpr std::array<std::pair<ARMEmitter::Register, ARMEmitter::Register>, 7> RAPair = {{
{ARMEmitter::Reg::r20, ARMEmitter::Reg::r21},
{ARMEmitter::Reg::r22, ARMEmitter::Reg::r23},
{ARMEmitter::Reg::r24, ARMEmitter::Reg::r25},
{FEXCore::ARMEmitter::Reg::r12, FEXCore::ARMEmitter::Reg::r13},
{FEXCore::ARMEmitter::Reg::r14, FEXCore::ARMEmitter::Reg::r15},
{FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17},
{FEXCore::ARMEmitter::Reg::r29, FEXCore::ARMEmitter::Reg::r30},
{ARMEmitter::Reg::r12, ARMEmitter::Reg::r13},
{ARMEmitter::Reg::r14, ARMEmitter::Reg::r15},
{ARMEmitter::Reg::r16, ARMEmitter::Reg::r17},
{ARMEmitter::Reg::r29, ARMEmitter::Reg::r30},
}};
// All are caller saved
constexpr std::array<FEXCore::ARMEmitter::VRegister, 8> SRAFPR = {
FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19,
FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23,
constexpr std::array<ARMEmitter::VRegister, 8> SRAFPR = {
ARMEmitter::VReg::v16, ARMEmitter::VReg::v17, ARMEmitter::VReg::v18, ARMEmitter::VReg::v19,
ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22, ARMEmitter::VReg::v23,
};
// v8..v15 = (lower 64bits) Callee saved
constexpr std::array<FEXCore::ARMEmitter::VRegister, 22> RAFPR = {
constexpr std::array<ARMEmitter::VRegister, 22> RAFPR = {
// v0 ~ v1 are used as temps.
// FEXCore::ARMEmitter::VReg::v0, FEXCore::ARMEmitter::VReg::v1,
// ARMEmitter::VReg::v0, ARMEmitter::VReg::v1,
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5,
FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9,
FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13,
FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6,
ARMEmitter::VReg::v7, ARMEmitter::VReg::v8, ARMEmitter::VReg::v9, ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27,
FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31};
ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27, ARMEmitter::VReg::v28,
ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};
// I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15.
// SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI.
constexpr std::array<FEXCore::ARMEmitter::Register, 5> PreserveAll_SRA = {
FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, FEXCore::ARMEmitter::Reg::r6,
FEXCore::ARMEmitter::Reg::r7, FEXCore::ARMEmitter::Reg::r8,
constexpr std::array<ARMEmitter::Register, 5> PreserveAll_SRA = {
ARMEmitter::Reg::r4, ARMEmitter::Reg::r5, ARMEmitter::Reg::r6, ARMEmitter::Reg::r7, ARMEmitter::Reg::r8,
};
constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t {
@ -306,11 +300,10 @@ namespace x32 {
}()};
// Dynamic GPRs
constexpr std::array<FEXCore::ARMEmitter::Register, 3> PreserveAll_Dynamic = {
FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, FEXCore::ARMEmitter::Reg::r30};
constexpr std::array<ARMEmitter::Register, 3> PreserveAll_Dynamic = {ARMEmitter::Reg::r16, ARMEmitter::Reg::r17, ARMEmitter::Reg::r30};
// SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI.
constexpr std::array<FEXCore::ARMEmitter::Register, 0> PreserveAll_SRAFPR = {
constexpr std::array<ARMEmitter::Register, 0> PreserveAll_SRAFPR = {
// None.
};
@ -324,15 +317,14 @@ namespace x32 {
// Dynamic FPRs
// - v0-v7
constexpr std::array<FEXCore::ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
constexpr std::array<ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
// v0 ~ v1 are temps
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4,
FEXCore::ARMEmitter::VReg::v5, FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
};
// SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI.
// This is /all/ of the SRA registers
constexpr std::array<FEXCore::ARMEmitter::VRegister, 8> PreserveAll_SRAFPRSVE = SRAFPR;
constexpr std::array<ARMEmitter::VRegister, 8> PreserveAll_SRAFPRSVE = SRAFPR;
constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t {
uint32_t Mask {};
@ -343,15 +335,14 @@ namespace x32 {
}()};
// Dynamic FPRs when the host supports SVE-256bit.
constexpr std::array<FEXCore::ARMEmitter::VRegister, 22> PreserveAll_DynamicFPRSVE = {
constexpr std::array<ARMEmitter::VRegister, 22> PreserveAll_DynamicFPRSVE = {
// v0 ~ v1 are used as temps.
FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5,
FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9,
FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13,
FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15,
ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6,
ARMEmitter::VReg::v7, ARMEmitter::VReg::v8, ARMEmitter::VReg::v9, ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27,
FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31};
ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27, ARMEmitter::VReg::v28,
ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};
} // namespace x32
// We want vixl to not allocate a default buffer. Jit and dispatcher will manually create one.
@ -589,7 +580,7 @@ void Arm64Emitter::PopCalleeSavedRegisters() {
}
}
void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FPRs, uint32_t GPRSpillMask, uint32_t FPRSpillMask) {
void Arm64Emitter::SpillStaticRegs(ARMEmitter::Register TmpReg, bool FPRs, uint32_t GPRSpillMask, uint32_t FPRSpillMask) {
#ifndef VIXL_SIMULATOR
if (EmitterCTX->HostFeatures.SupportsAFP) {
// Disable AFP features when spilling registers.
@ -683,7 +674,7 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP
}
void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRFillMask) {
FEXCore::ARMEmitter::Register TmpReg = FEXCore::ARMEmitter::Reg::r0;
ARMEmitter::Register TmpReg = ARMEmitter::Reg::r0;
LOGMAN_THROW_A_FMT(GPRFillMask != 0, "Must fill at least 1 GPR for a temp");
[[maybe_unused]] bool FoundRegister {};
for (auto Reg : StaticRegisters) {
@ -798,7 +789,7 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF
}
}
void Arm64Emitter::PushVectorRegisters(FEXCore::ARMEmitter::Register TmpReg, bool SVERegs, std::span<const FEXCore::ARMEmitter::VRegister> VRegs) {
void Arm64Emitter::PushVectorRegisters(ARMEmitter::Register TmpReg, bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs) {
if (SVERegs) {
size_t i = 0;
@ -835,7 +826,7 @@ void Arm64Emitter::PushVectorRegisters(FEXCore::ARMEmitter::Register TmpReg, boo
}
}
void Arm64Emitter::PushGeneralRegisters(FEXCore::ARMEmitter::Register TmpReg, std::span<const FEXCore::ARMEmitter::Register> Regs) {
void Arm64Emitter::PushGeneralRegisters(ARMEmitter::Register TmpReg, std::span<const ARMEmitter::Register> Regs) {
size_t i = 0;
for (; i < (Regs.size() % 2); ++i) {
const auto Reg1 = Regs[i];
@ -849,7 +840,7 @@ void Arm64Emitter::PushGeneralRegisters(FEXCore::ARMEmitter::Register TmpReg, st
}
}
void Arm64Emitter::PopVectorRegisters(bool SVERegs, std::span<const FEXCore::ARMEmitter::VRegister> VRegs) {
void Arm64Emitter::PopVectorRegisters(bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs) {
if (SVERegs) {
size_t i = 0;
for (; i < (VRegs.size() % 4); i += 2) {
@ -885,7 +876,7 @@ void Arm64Emitter::PopVectorRegisters(bool SVERegs, std::span<const FEXCore::ARM
}
}
void Arm64Emitter::PopGeneralRegisters(std::span<const FEXCore::ARMEmitter::Register> Regs) {
void Arm64Emitter::PopGeneralRegisters(std::span<const ARMEmitter::Register> Regs) {
size_t i = 0;
for (; i < (Regs.size() % 2); ++i) {
const auto Reg1 = Regs[i];
@ -898,7 +889,7 @@ void Arm64Emitter::PopGeneralRegisters(std::span<const FEXCore::ARMEmitter::Regi
}
}
void Arm64Emitter::PushDynamicRegsAndLR(FEXCore::ARMEmitter::Register TmpReg) {
void Arm64Emitter::PushDynamicRegsAndLR(ARMEmitter::Register TmpReg) {
const auto CanUseSVE = EmitterCTX->HostFeatures.SupportsAVX;
const auto GPRSize = (ConfiguredDynamicRegisterBase.size() + 1) * Core::CPUState::GPR_REG_SIZE;
const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
@ -937,12 +928,12 @@ void Arm64Emitter::PopDynamicRegsAndLR() {
#endif
}
void Arm64Emitter::SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpReg, bool FPRs) {
void Arm64Emitter::SpillForPreserveAllABICall(ARMEmitter::Register TmpReg, bool FPRs) {
const auto CanUseSVE = EmitterCTX->HostFeatures.SupportsAVX;
const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
std::span<const FEXCore::ARMEmitter::Register> DynamicGPRs {};
std::span<const FEXCore::ARMEmitter::VRegister> DynamicFPRs {};
std::span<const ARMEmitter::Register> DynamicGPRs {};
std::span<const ARMEmitter::VRegister> DynamicFPRs {};
uint32_t PreserveSRAMask {};
uint32_t PreserveSRAFPRMask {};
if (EmitterCTX->Config.Is64BitMode()) {
@ -989,8 +980,8 @@ void Arm64Emitter::SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpR
void Arm64Emitter::FillForPreserveAllABICall(bool FPRs) {
const auto CanUseSVE = EmitterCTX->HostFeatures.SupportsAVX;
std::span<const FEXCore::ARMEmitter::Register> DynamicGPRs {};
std::span<const FEXCore::ARMEmitter::VRegister> DynamicFPRs {};
std::span<const ARMEmitter::Register> DynamicGPRs {};
std::span<const ARMEmitter::VRegister> DynamicFPRs {};
uint32_t PreserveSRAMask {};
uint32_t PreserveSRAFPRMask {};

View File

@ -2,9 +2,6 @@
#pragma once
#include "FEXCore/Utils/EnumUtils.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include "Interface/Core/ObjectCache/Relocations.h"
#include <aarch64/assembler-aarch64.h>
@ -22,6 +19,8 @@
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/vector.h>
#include <CodeEmitter/Emitter.h>
#include <CodeEmitter/Registers.h>
#include <array>
#include <cstddef>
@ -35,66 +34,66 @@ class ContextImpl;
namespace FEXCore::CPU {
// Contains the address to the currently available CPU state
constexpr auto STATE = FEXCore::ARMEmitter::XReg::x28;
constexpr auto STATE = ARMEmitter::XReg::x28;
#ifndef _M_ARM_64EC
// GPR temporaries. Only x3 can be used across spill boundaries
// so if these ever need to change, be very careful about that.
constexpr auto TMP1 = FEXCore::ARMEmitter::XReg::x0;
constexpr auto TMP2 = FEXCore::ARMEmitter::XReg::x1;
constexpr auto TMP3 = FEXCore::ARMEmitter::XReg::x2;
constexpr auto TMP4 = FEXCore::ARMEmitter::XReg::x3;
constexpr auto TMP1 = ARMEmitter::XReg::x0;
constexpr auto TMP2 = ARMEmitter::XReg::x1;
constexpr auto TMP3 = ARMEmitter::XReg::x2;
constexpr auto TMP4 = ARMEmitter::XReg::x3;
constexpr bool TMP_ABIARGS = true;
// We pin r26/r27 as PF/AF respectively, this is internal FEX ABI.
constexpr auto REG_PF = FEXCore::ARMEmitter::Reg::r26;
constexpr auto REG_AF = FEXCore::ARMEmitter::Reg::r27;
constexpr auto REG_PF = ARMEmitter::Reg::r26;
constexpr auto REG_AF = ARMEmitter::Reg::r27;
// Vector temporaries
constexpr auto VTMP1 = FEXCore::ARMEmitter::VReg::v0;
constexpr auto VTMP2 = FEXCore::ARMEmitter::VReg::v1;
constexpr auto VTMP1 = ARMEmitter::VReg::v0;
constexpr auto VTMP2 = ARMEmitter::VReg::v1;
#else
constexpr auto TMP1 = FEXCore::ARMEmitter::XReg::x10;
constexpr auto TMP2 = FEXCore::ARMEmitter::XReg::x11;
constexpr auto TMP3 = FEXCore::ARMEmitter::XReg::x12;
constexpr auto TMP4 = FEXCore::ARMEmitter::XReg::x13;
constexpr auto TMP1 = ARMEmitter::XReg::x10;
constexpr auto TMP2 = ARMEmitter::XReg::x11;
constexpr auto TMP3 = ARMEmitter::XReg::x12;
constexpr auto TMP4 = ARMEmitter::XReg::x13;
constexpr bool TMP_ABIARGS = false;
// We pin r11/r12 as PF/AF respectively for arm64ec, as r26/r27 are used for SRA.
constexpr auto REG_PF = FEXCore::ARMEmitter::Reg::r9;
constexpr auto REG_AF = FEXCore::ARMEmitter::Reg::r24;
constexpr auto REG_PF = ARMEmitter::Reg::r9;
constexpr auto REG_AF = ARMEmitter::Reg::r24;
// Vector temporaries
constexpr auto VTMP1 = FEXCore::ARMEmitter::VReg::v16;
constexpr auto VTMP2 = FEXCore::ARMEmitter::VReg::v17;
constexpr auto VTMP1 = ARMEmitter::VReg::v16;
constexpr auto VTMP2 = ARMEmitter::VReg::v17;
// Entry/Exit ABI
constexpr auto EC_CALL_CHECKER_PC_REG = FEXCore::ARMEmitter::XReg::x9;
constexpr auto EC_ENTRY_CPUAREA_REG = FEXCore::ARMEmitter::XReg::x17;
constexpr auto EC_CALL_CHECKER_PC_REG = ARMEmitter::XReg::x9;
constexpr auto EC_ENTRY_CPUAREA_REG = ARMEmitter::XReg::x17;
#endif
// Predicate register temporaries (used when AVX support is enabled)
// PRED_TMP_16B indicates a predicate register that indicates the first 16 bytes set to 1.
// PRED_TMP_32B indicates a predicate register that indicates the first 32 bytes set to 1.
constexpr FEXCore::ARMEmitter::PRegister PRED_TMP_16B = FEXCore::ARMEmitter::PReg::p6;
constexpr FEXCore::ARMEmitter::PRegister PRED_TMP_32B = FEXCore::ARMEmitter::PReg::p7;
constexpr ARMEmitter::PRegister PRED_TMP_16B = ARMEmitter::PReg::p6;
constexpr ARMEmitter::PRegister PRED_TMP_32B = ARMEmitter::PReg::p7;
// This class contains common emitter utility functions that can
// be used by both Arm64 JIT and ARM64 Dispatcher
class Arm64Emitter : public FEXCore::ARMEmitter::Emitter {
class Arm64Emitter : public ARMEmitter::Emitter {
protected:
Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr = nullptr, size_t size = 0);
FEXCore::Context::ContextImpl* EmitterCTX;
vixl::aarch64::CPU CPU;
std::span<const FEXCore::ARMEmitter::Register> ConfiguredDynamicRegisterBase {};
std::span<const FEXCore::ARMEmitter::Register> StaticRegisters {};
std::span<const FEXCore::ARMEmitter::Register> GeneralRegisters {};
std::span<const std::pair<FEXCore::ARMEmitter::Register, FEXCore::ARMEmitter::Register>> GeneralPairRegisters {};
std::span<const FEXCore::ARMEmitter::VRegister> StaticFPRegisters {};
std::span<const FEXCore::ARMEmitter::VRegister> GeneralFPRegisters {};
std::span<const ARMEmitter::Register> ConfiguredDynamicRegisterBase {};
std::span<const ARMEmitter::Register> StaticRegisters {};
std::span<const ARMEmitter::Register> GeneralRegisters {};
std::span<const std::pair<ARMEmitter::Register, ARMEmitter::Register>> GeneralPairRegisters {};
std::span<const ARMEmitter::VRegister> StaticFPRegisters {};
std::span<const ARMEmitter::VRegister> GeneralFPRegisters {};
/**
* @name Register Allocation
@ -111,13 +110,13 @@ protected:
constexpr static uint8_t RA_64 = 1;
constexpr static uint8_t RA_FPR = 2;
void LoadConstant(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register Reg, uint64_t Constant, bool NOPPad = false);
void LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, uint64_t Constant, bool NOPPad = false);
// NOTE: These functions WILL clobber the register TMP4 if AVX support is enabled
// and FPRs are being spilled or filled. If only GPRs are spilled/filled, then
// TMP4 is left alone.
void SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FPRs = true, uint32_t GPRSpillMask = ~0U, uint32_t FPRSpillMask = ~0U);
void SpillStaticRegs(ARMEmitter::Register TmpReg, bool FPRs = true, uint32_t GPRSpillMask = ~0U, uint32_t FPRSpillMask = ~0U);
void FillStaticRegs(bool FPRs = true, uint32_t GPRFillMask = ~0U, uint32_t FPRFillMask = ~0U);
// Register 0-18 + 29 + 30 are caller saved
@ -128,13 +127,13 @@ protected:
static constexpr uint32_t CALLER_FPR_MASK = ~0U;
// Generic push and pop vector registers.
void PushVectorRegisters(FEXCore::ARMEmitter::Register TmpReg, bool SVERegs, std::span<const FEXCore::ARMEmitter::VRegister> VRegs);
void PushGeneralRegisters(FEXCore::ARMEmitter::Register TmpReg, std::span<const FEXCore::ARMEmitter::Register> Regs);
void PushVectorRegisters(ARMEmitter::Register TmpReg, bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs);
void PushGeneralRegisters(ARMEmitter::Register TmpReg, std::span<const ARMEmitter::Register> Regs);
void PopVectorRegisters(bool SVERegs, std::span<const FEXCore::ARMEmitter::VRegister> VRegs);
void PopGeneralRegisters(std::span<const FEXCore::ARMEmitter::Register> Regs);
void PopVectorRegisters(bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs);
void PopGeneralRegisters(std::span<const ARMEmitter::Register> Regs);
void PushDynamicRegsAndLR(FEXCore::ARMEmitter::Register TmpReg);
void PushDynamicRegsAndLR(ARMEmitter::Register TmpReg);
void PopDynamicRegsAndLR();
void PushCalleeSavedRegisters();
@ -150,10 +149,10 @@ protected:
// Callee Saved:
// - X9-X15, X19-X31
// - Low 128-bits of v8-v31
void SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpReg, bool FPRs = true);
void SpillForPreserveAllABICall(ARMEmitter::Register TmpReg, bool FPRs = true);
void FillForPreserveAllABICall(bool FPRs = true);
void SpillForABICall(bool SupportsPreserveAllABI, FEXCore::ARMEmitter::Register TmpReg, bool FPRs = true) {
void SpillForABICall(bool SupportsPreserveAllABI, ARMEmitter::Register TmpReg, bool FPRs = true) {
if (SupportsPreserveAllABI) {
SpillForPreserveAllABICall(TmpReg, FPRs);
} else {

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: MIT
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/X86HelperGen.h"
@ -17,6 +16,8 @@
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <CodeEmitter/Emitter.h>
#include <atomic>
#include <condition_variable>
#include <csignal>

View File

@ -7,8 +7,6 @@ $end_info$
#include "FEXCore/IR/IR.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"
@ -1373,23 +1371,23 @@ DEF_OP(Popcount) {
case 0x1:
fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
// only use lowest byte
cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
break;
case 0x2:
fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
// only count two lowest bytes
addp(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D());
addp(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D());
break;
case 0x4:
fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
// fmov has zero extended, unused bytes are zero
addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
break;
case 0x8:
fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Src);
cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
// fmov has zero extended, unused bytes are zero
addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
break;

View File

@ -6,7 +6,6 @@ $end_info$
*/
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"

View File

@ -7,7 +7,6 @@ $end_info$
#include "Interface/Context/Context.h"
#include "FEXCore/IR/IR.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"

View File

@ -5,7 +5,6 @@ tags: backend|arm64
$end_info$
*/
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
namespace FEXCore::CPU {
@ -66,7 +65,7 @@ DEF_OP(VInsGPR) {
// Inserts the GPR value into the given V register.
// Also automatically adjusts the index in the case of using the
// moved upper lane.
const auto Insert = [&](const FEXCore::ARMEmitter::VRegister& reg, int index) {
const auto Insert = [&](const ARMEmitter::VRegister& reg, int index) {
if (InUpperLane) {
index -= ElementsPer128Bit;
}
@ -353,23 +352,23 @@ DEF_OP(Vector_FToF) {
switch (Conv) {
case 0x0402: { // Float <- Half
zip1(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Vector.Z(), Vector.Z());
fcvtlt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Dst.Z());
zip1(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Vector.Z(), Vector.Z());
fcvtlt(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Dst.Z());
break;
}
case 0x0804: { // Double <- Float
zip1(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Vector.Z(), Vector.Z());
fcvtlt(FEXCore::ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Dst.Z());
zip1(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Vector.Z(), Vector.Z());
fcvtlt(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Dst.Z());
break;
}
case 0x0204: { // Half <- Float
fcvtnt(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Mask, Vector.Z());
uzp2(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Dst.Z(), Dst.Z());
fcvtnt(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Mask, Vector.Z());
uzp2(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Dst.Z(), Dst.Z());
break;
}
case 0x0408: { // Float <- Double
fcvtnt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z());
uzp2(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z());
fcvtnt(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z());
uzp2(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z());
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); break;

View File

@ -5,9 +5,7 @@ tags: backend|arm64
$end_info$
*/
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"
namespace FEXCore::CPU {
#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node)

View File

@ -13,7 +13,6 @@ $end_info$
#include "FEXCore/Utils/Telemetry.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
@ -469,13 +468,13 @@ void Arm64JITCore::Op_Unhandled(const IR::IROp_Header* IROp, IR::NodeID Node) {
static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) {
auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker;
uintptr_t branch = (uintptr_t)(Record)-8;
FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8);
FEXCore::ARMEmitter::SingleUseForwardLabel l_BranchHost;
ARMEmitter::Emitter emit((uint8_t*)(branch), 8);
ARMEmitter::SingleUseForwardLabel l_BranchHost;
emit.ldr(TMP1, &l_BranchHost);
emit.blr(TMP1);
emit.Bind(&l_BranchHost);
emit.dc64(LinkerAddress);
FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 8);
ARMEmitter::Emitter::ClearICache((void*)branch, 8);
}
static void IndirectBlockDelinker(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) {
@ -500,9 +499,9 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame* Fram
if (vixl::IsInt26(offset)) {
// optimal case - can branch directly
// patch the code
FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 4);
ARMEmitter::Emitter emit((uint8_t*)(branch), 4);
emit.b(offset);
FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 4);
ARMEmitter::Emitter::ClearICache((void*)branch, 4);
// Add de-linking handler
Thread->LookupCache->AddBlockLink(GuestRip, Record, DirectBlockDelinker);

View File

@ -8,7 +8,6 @@ $end_info$
#pragma once
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/IR/IR.h"
@ -24,6 +23,8 @@ $end_info$
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <CodeEmitter/Emitter.h>
#include <array>
#include <cstdint>
#include <utility>
@ -83,7 +84,7 @@ private:
fextl::map<IR::NodeID, ARMEmitter::BiDirectionalLabel> JumpTargets;
[[nodiscard]]
FEXCore::ARMEmitter::Register GetReg(IR::NodeID Node) const {
ARMEmitter::Register GetReg(IR::NodeID Node) const {
const auto Reg = GetPhys(Node);
LOGMAN_THROW_AA_FMT(Reg.Class == IR::GPRFixedClass.Val || Reg.Class == IR::GPRClass.Val, "Unexpected Class: {}", Reg.Class);
@ -98,7 +99,7 @@ private:
}
[[nodiscard]]
FEXCore::ARMEmitter::VRegister GetVReg(IR::NodeID Node) const {
ARMEmitter::VRegister GetVReg(IR::NodeID Node) const {
const auto Reg = GetPhys(Node);
LOGMAN_THROW_AA_FMT(Reg.Class == IR::FPRFixedClass.Val || Reg.Class == IR::FPRClass.Val, "Unexpected Class: {}", Reg.Class);
@ -113,7 +114,7 @@ private:
}
[[nodiscard]]
std::pair<FEXCore::ARMEmitter::Register, FEXCore::ARMEmitter::Register> GetRegPair(IR::NodeID Node) const {
std::pair<ARMEmitter::Register, ARMEmitter::Register> GetRegPair(IR::NodeID Node) const {
const auto Reg = GetPhys(Node);
LOGMAN_THROW_AA_FMT(Reg.Class == IR::GPRPairClass.Val, "Unexpected Class: {}", Reg.Class);
@ -134,7 +135,7 @@ private:
}
[[nodiscard]]
FEXCore::ARMEmitter::Register GetZeroableReg(IR::OrderedNodeWrapper Src) const {
ARMEmitter::Register GetZeroableReg(IR::OrderedNodeWrapper Src) const {
uint64_t Const;
if (IsInlineConstant(Src, &Const)) {
LOGMAN_THROW_AA_FMT(Const == 0, "Only valid constant");
@ -162,8 +163,8 @@ private:
bool IsGPRPair(IR::NodeID Node) const;
[[nodiscard]]
FEXCore::ARMEmitter::ExtendedMemOperand GenerateMemOperand(
uint8_t AccessSize, FEXCore::ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale);
ARMEmitter::ExtendedMemOperand GenerateMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
IR::MemOffsetType OffsetType, uint8_t OffsetScale);
// NOTE: Will use TMP1 as a way to encode immediates that happen to fall outside
// the limits of the scalar plus immediate variant of SVE load/stores.
@ -171,8 +172,8 @@ private:
// TMP1 is safe to use again once this memory operand is used with its
// equivalent loads or stores that this was called for.
[[nodiscard]]
FEXCore::ARMEmitter::SVEMemOperand GenerateSVEMemOperand(uint8_t AccessSize, FEXCore::ARMEmitter::Register Base,
IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale);
ARMEmitter::SVEMemOperand GenerateSVEMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
IR::MemOffsetType OffsetType, uint8_t OffsetScale);
[[nodiscard]]
bool IsInlineConstant(const IR::OrderedNodeWrapper& Node, uint64_t* Value = nullptr) const;

View File

@ -7,8 +7,6 @@ $end_info$
#include "FEXCore/Core/X86Enums.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include "Interface/Core/CPUID.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
#include <FEXCore/Utils/CompilerDefs.h>
@ -170,7 +168,7 @@ DEF_OP(LoadContextIndexed) {
case 2:
case 4:
case 8: {
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
const auto Dst = GetReg(Node);
switch (OpSize) {
case 1: ldrb(Dst, TMP1, Op->BaseOffset); break;
@ -192,7 +190,7 @@ DEF_OP(LoadContextIndexed) {
case 8:
case 16:
case 32: {
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
const auto Dst = GetVReg(Node);
switch (OpSize) {
@ -235,7 +233,7 @@ DEF_OP(StoreContextIndexed) {
case 2:
case 4:
case 8: {
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
switch (OpSize) {
case 1: strb(Value, TMP1, Op->BaseOffset); break;
@ -259,7 +257,7 @@ DEF_OP(StoreContextIndexed) {
case 8:
case 16:
case 32: {
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
switch (OpSize) {
case 1: strb(Value, TMP1, Op->BaseOffset); break;
@ -552,8 +550,8 @@ DEF_OP(StoreFlag) {
strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
}
FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
uint8_t AccessSize, FEXCore::ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
if (Offset.IsInvalid()) {
return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, 0);
} else {
@ -580,17 +578,16 @@ FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
FEX_UNREACHABLE;
}
FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t AccessSize, FEXCore::ARMEmitter::Register Base,
IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType,
[[maybe_unused]] uint8_t OffsetScale) {
ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
IR::MemOffsetType OffsetType, [[maybe_unused]] uint8_t OffsetScale) {
if (Offset.IsInvalid()) {
return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), 0);
return ARMEmitter::SVEMemOperand(Base.X(), 0);
}
uint64_t Const {};
if (IsInlineConstant(Offset, &Const)) {
if (Const == 0) {
return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), 0);
return ARMEmitter::SVEMemOperand(Base.X(), 0);
}
const auto SignedConst = static_cast<int64_t>(Const);
@ -613,13 +610,13 @@ FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t A
// then we can encode it as an immediate offset.
//
if (IsCleanlyDivisible && Index >= -8 && Index <= 7) {
return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), static_cast<uint64_t>(Index));
return ARMEmitter::SVEMemOperand(Base.X(), static_cast<uint64_t>(Index));
}
// If we can't do that for whatever reason, then unfortunately, we need
// to move it over to a temporary to use as an offset.
mov(TMP1, Const);
return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), TMP1);
return ARMEmitter::SVEMemOperand(Base.X(), TMP1);
}
// Otherwise handle it like normal.
@ -629,7 +626,7 @@ FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t A
LOGMAN_THROW_A_FMT(OffsetType.Val == IR::MEM_OFFSET_SXTX.Val, "Currently only the default offset type (SXTX) is supported.");
const auto RegOffset = GetReg(Offset.ID());
return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), RegOffset.X());
return ARMEmitter::SVEMemOperand(Base.X(), RegOffset.X());
}
DEF_OP(LoadMem) {
@ -743,7 +740,7 @@ DEF_OP(LoadMemTSO) {
}
if (VectorTSOEnabled()) {
// Half-barrier.
dmb(FEXCore::ARMEmitter::BarrierScope::ISHLD);
dmb(ARMEmitter::BarrierScope::ISHLD);
}
}
}
@ -895,7 +892,7 @@ DEF_OP(VStoreVectorElement) {
// Emit a half-barrier if TSO is enabled.
if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) {
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
}
if (Is256Bit) {
@ -1141,7 +1138,7 @@ DEF_OP(StoreMemTSO) {
} else {
if (VectorTSOEnabled()) {
// Half-Barrier.
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
}
const auto Src = GetVReg(Op->Value.ID());
const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
@ -1180,7 +1177,7 @@ DEF_OP(MemSet) {
uint64_t DirectionConstant;
bool DirectionIsInline = IsInlineConstant(Op->Direction, &DirectionConstant);
FEXCore::ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
if (!DirectionIsInline) {
DirectionReg = GetReg(Op->Direction.ID());
}
@ -1369,7 +1366,7 @@ DEF_OP(MemCpy) {
const auto Length = GetReg(Op->Length.ID());
uint64_t DirectionConstant;
bool DirectionIsInline = IsInlineConstant(Op->Direction, &DirectionConstant);
FEXCore::ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
if (!DirectionIsInline) {
DirectionReg = GetReg(Op->Direction.ID());
}
@ -1712,9 +1709,9 @@ DEF_OP(ParanoidLoadMemTSO) {
ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2);
break;
case 32:
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg);
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
break;
default: LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); break;
}
@ -1788,9 +1785,9 @@ DEF_OP(ParanoidStoreMemTSO) {
break;
}
case 32: {
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemReg, 0);
dmb(FEXCore::ARMEmitter::BarrierScope::ISH);
dmb(ARMEmitter::BarrierScope::ISH);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); break;
@ -1818,7 +1815,7 @@ DEF_OP(CacheLineClear) {
if (Op->Serialize) {
// If requested, serialized all of the data cache operations.
dsb(FEXCore::ARMEmitter::BarrierScope::ISH);
dsb(ARMEmitter::BarrierScope::ISH);
}
}

View File

@ -10,7 +10,6 @@ $end_info$
#endif
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
#include "FEXCore/Debug/InternalThreadState.h"
@ -28,9 +27,9 @@ DEF_OP(GuestOpcode) {
DEF_OP(Fence) {
auto Op = IROp->C<IR::IROp_Fence>();
switch (Op->Fence) {
case IR::Fence_Load.Val: dmb(FEXCore::ARMEmitter::BarrierScope::LD); break;
case IR::Fence_LoadStore.Val: dmb(FEXCore::ARMEmitter::BarrierScope::SY); break;
case IR::Fence_Store.Val: dmb(FEXCore::ARMEmitter::BarrierScope::ST); break;
case IR::Fence_Load.Val: dmb(ARMEmitter::BarrierScope::LD); break;
case IR::Fence_LoadStore.Val: dmb(ARMEmitter::BarrierScope::SY); break;
case IR::Fence_Store.Val: dmb(ARMEmitter::BarrierScope::ST); break;
default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
}
}

View File

@ -5,8 +5,6 @@ tags: backend|arm64
$end_info$
*/
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
@ -792,7 +790,7 @@ DEF_OP(LoadNamedVectorConstant) {
}
}
// Load the pointer.
auto GenerateMemOperand = [this](uint8_t OpSize, uint32_t NamedConstant, FEXCore::ARMEmitter::Register Base) {
auto GenerateMemOperand = [this](uint8_t OpSize, uint32_t NamedConstant, ARMEmitter::Register Base) {
const auto ConstantOffset = offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.NamedVectorConstants[NamedConstant]);
if (ConstantOffset <= 255 || // Unscaled 9-bit signed
@ -4794,7 +4792,8 @@ DEF_OP(VTBL2) {
mov(VTMP2.Q(), VectorTable2.Q());
}
static_assert(ARMEmitter::AreVectorsSequential(VTMP1, VTMP2), "VTMP1 and VTMP2 must be sequential in order to use double-table TBL");
static_assert(ARMEmitter::AreVectorsSequential(VTMP1, VTMP2), "VTMP1 and VTMP2 must be sequential in order to use double-table "
"TBL");
VectorTable1 = VTMP1;
VectorTable2 = VTMP2;
}

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: PC relative") {
{

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic AES") {
if (false) {

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Conditional branch immediate") {
{

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Compare and swap pair") {
TEST_SINGLE(casp(Size::i32Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "casp w28, w29, w26, w27, [x30]");

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Base Encodings") {
TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "mov z30.b, b29");
@ -287,11 +287,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert pre
TEST_SINGLE(fcvtlt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.d, p6/m, z29.s");
// void fcvtxnt(FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) {
// void fcvtxnt(ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {
/////< Size is destination size
// void fcvtnt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) {
// void fcvtnt(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {
/////< Size is destination size
// void fcvtlt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) {
// void fcvtlt(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {
// XXX: BFCVTNT
}
@ -1634,12 +1634,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally extract elem
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Extract") {
TEST_SINGLE(ext<FEXCore::ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 0), "ext z30.b, z30.b, z29.b, #0");
TEST_SINGLE(ext<FEXCore::ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 255), "ext z30.b, z30.b, z29.b, #255");
TEST_SINGLE(ext<ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 0), "ext z30.b, z30.b, z29.b, #0");
TEST_SINGLE(ext<ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 255), "ext z30.b, z30.b, z29.b, #255");
TEST_SINGLE(ext<FEXCore::ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 0), "ext z30.b, {z28.b, z29.b}, #0");
TEST_SINGLE(ext<FEXCore::ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 255), "ext z30.b, {z28.b, z29.b}, #255");
TEST_SINGLE(ext<FEXCore::ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z31, ZReg::z0, 255), "ext z30.b, {z31.b, z0.b}, #255");
TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 0), "ext z30.b, {z28.b, z29.b}, #0");
TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 255), "ext z30.b, {z28.b, z29.b}, #255");
TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z31, ZReg::z0, 255), "ext z30.b, {z31.b, z0.b}, #255");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector segments") {

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar copy") {
TEST_SINGLE(dup(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 0), "mov b30, v29.b[0]");

View File

@ -3,7 +3,7 @@
#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>
using namespace FEXCore::ARMEmitter;
using namespace ARMEmitter;
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System with result") {
// TODO: Implement in emitter.

View File

@ -1,7 +1,6 @@
#pragma once
#include <FEXCore/fextl/string.h>
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include <CodeEmitter/Emitter.h>
#include <aarch64/cpu-aarch64.h>
#include <aarch64/instructions-aarch64.h>
@ -9,7 +8,7 @@
#include <sys/mman.h>
class TestDisassembler : public FEXCore::ARMEmitter::Emitter {
class TestDisassembler : public ARMEmitter::Emitter {
public:
TestDisassembler() {
fp = tmpfile();