mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-15 01:49:00 +00:00
Merge pull request #3919 from Sonicadvance1/remove_vestigial_vixl_usage
CodeEmitter: Removes vestigial vixl usage
This commit is contained in:
commit
2da819c0f3
@ -602,6 +602,18 @@ constexpr bool AreVectorsSequential(T first, const Args&... args) {
|
||||
return (fn(first, args) && ...);
|
||||
}
|
||||
|
||||
// Returns if the immediate can fit in to add/sub immediate instruction encodings.
|
||||
constexpr bool IsImmAddSub(uint64_t imm) {
|
||||
constexpr uint64_t U12Mask = 0xFFF;
|
||||
auto FitsWithin12Bits = [](uint64_t imm) {
|
||||
return (imm & ~U12Mask) == 0;
|
||||
};
|
||||
// Can fit in to the instruction encoding:
|
||||
// - if only bits [11:0] are set.
|
||||
// - if only bits [23:12] are set.
|
||||
return FitsWithin12Bits(imm) || (FitsWithin12Bits(imm >> 12) && (imm & U12Mask) == 0);
|
||||
}
|
||||
|
||||
// This is an emitter that is designed around the smallest code bloat as possible.
|
||||
// Eschewing most developer convenience in order to keep code as small as possible.
|
||||
|
||||
|
@ -36,9 +36,9 @@
|
||||
// to by n, imm_s and imm_r are undefined.
|
||||
static bool IsImmLogical(uint64_t value,
|
||||
unsigned width,
|
||||
unsigned* n,
|
||||
unsigned* imm_s,
|
||||
unsigned* imm_r) {
|
||||
unsigned* n = nullptr,
|
||||
unsigned* imm_s = nullptr,
|
||||
unsigned* imm_r = nullptr) {
|
||||
[[maybe_unused]] constexpr auto kBRegSize = 8;
|
||||
[[maybe_unused]] constexpr auto kHRegSize = 16;
|
||||
[[maybe_unused]] constexpr auto kSRegSize = 32;
|
||||
@ -243,6 +243,46 @@ static bool IsImmLogical(uint64_t value,
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool IsIntN(unsigned n, int64_t x) {
|
||||
if (n == 64) return true;
|
||||
int64_t limit = INT64_C(1) << (n - 1);
|
||||
return (-limit <= x) && (x < limit);
|
||||
}
|
||||
|
||||
static inline bool IsUintN(unsigned n, int64_t x) {
|
||||
// Convert to an unsigned integer to avoid implementation-defined behavior.
|
||||
return !(static_cast<uint64_t>(x) >> n);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
#define INT_1_TO_32_LIST(V) \
|
||||
V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \
|
||||
V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \
|
||||
V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \
|
||||
V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
|
||||
|
||||
#define INT_33_TO_63_LIST(V) \
|
||||
V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \
|
||||
V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \
|
||||
V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \
|
||||
V(57) V(58) V(59) V(60) V(61) V(62) V(63)
|
||||
|
||||
#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
|
||||
|
||||
// clang-format on
|
||||
|
||||
#define DECLARE_IS_INT_N(N) \
|
||||
static inline bool IsInt##N(int64_t x) { return IsIntN(N, x); }
|
||||
|
||||
#define DECLARE_IS_UINT_N(N) \
|
||||
static inline bool IsUint##N(int64_t x) { return IsUintN(N, x); }
|
||||
|
||||
INT_1_TO_63_LIST(DECLARE_IS_INT_N)
|
||||
INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
|
||||
|
||||
#undef DECLARE_IS_INT_N
|
||||
#undef DECLARE_IS_UINT_N
|
||||
|
||||
private:
|
||||
|
||||
template <typename V>
|
||||
|
@ -14,10 +14,12 @@
|
||||
#include <CodeEmitter/Emitter.h>
|
||||
#include <CodeEmitter/Registers.h>
|
||||
|
||||
#ifdef VIXL_DISASSEMBLER
|
||||
#include <aarch64/cpu-aarch64.h>
|
||||
#include <aarch64/instructions-aarch64.h>
|
||||
#include <cpu-features.h>
|
||||
#include <utils-vixl.h>
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
#include <tuple>
|
||||
@ -349,8 +351,6 @@ Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr
|
||||
}
|
||||
#endif
|
||||
|
||||
CPU.SetUp();
|
||||
|
||||
// Number of register available is dependent on what operating mode the proccess is in.
|
||||
if (EmitterCTX->Config.Is64BitMode()) {
|
||||
StaticRegisters = x64::SRA;
|
||||
@ -421,7 +421,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
|
||||
if (RequiredMoveSegments > 1) {
|
||||
// Only try to use this path if the number of segments is > 1.
|
||||
// `movz` is better than `orr` since hardware will rename or merge if possible when `movz` is used.
|
||||
const auto IsImm = vixl::aarch64::Assembler::IsImmLogical(Constant, RegSizeInBits(s));
|
||||
const auto IsImm = ARMEmitter::Emitter::IsImmLogical(Constant, RegSizeInBits(s));
|
||||
if (IsImm) {
|
||||
orr(s, Reg, ARMEmitter::Reg::zr, Constant);
|
||||
if (NOPPad) {
|
||||
@ -458,7 +458,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
|
||||
|
||||
// If the aligned offset is within the 4GB window then we can use ADRP+ADD
|
||||
// and the number of move segments more than 1
|
||||
if (RequiredMoveSegments > 1 && vixl::IsInt32(AlignedOffset)) {
|
||||
if (RequiredMoveSegments > 1 && ARMEmitter::Emitter::IsInt32(AlignedOffset)) {
|
||||
// If this is 4k page aligned then we only need ADRP
|
||||
if ((AlignedOffset & 0xFFF) == 0) {
|
||||
adrp(Reg, AlignedOffset >> 12);
|
||||
@ -466,7 +466,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
|
||||
// If the constant is within 1MB of PC then we can still use ADR to load in a single instruction
|
||||
// 21-bit signed integer here
|
||||
int64_t SmallOffset = static_cast<int64_t>(Constant) - static_cast<int64_t>(PC);
|
||||
if (vixl::IsInt21(SmallOffset)) {
|
||||
if (ARMEmitter::Emitter::IsInt21(SmallOffset)) {
|
||||
adr(Reg, SmallOffset);
|
||||
} else {
|
||||
// Need to use ADRP + ADD
|
||||
|
@ -94,7 +94,6 @@ protected:
|
||||
Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr = nullptr, size_t size = 0);
|
||||
|
||||
FEXCore::Context::ContextImpl* EmitterCTX;
|
||||
vixl::aarch64::CPU CPU;
|
||||
|
||||
std::span<const ARMEmitter::Register> ConfiguredDynamicRegisterBase {};
|
||||
std::span<const ARMEmitter::Register> StaticRegisters {};
|
||||
|
@ -51,11 +51,23 @@ static uint32_t GetMIDR() {
|
||||
return Result;
|
||||
}
|
||||
|
||||
__attribute__((naked)) static uint64_t ReadSVEVectorLengthInBits() {
|
||||
///< Can't use rdvl instruction directly because compilers will complain that sve/sme is required.
|
||||
__asm(R"(
|
||||
.word 0x04bf5100 // rdvl x0, #8
|
||||
ret;
|
||||
)");
|
||||
}
|
||||
#else
|
||||
static uint32_t GetDCZID() {
|
||||
// Return unsupported
|
||||
return DCZID_DZP_MASK;
|
||||
}
|
||||
|
||||
static int ReadSVEVectorLengthInBits() {
|
||||
// Return unsupported
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void OverrideFeatures(HostFeatures* Features, uint64_t ForceSVEWidth) {
|
||||
@ -164,7 +176,7 @@ HostFeatures::HostFeatures() {
|
||||
SupportsSVE256 = ForceSVEWidth() ? ForceSVEWidth() >= 256 : true;
|
||||
#else
|
||||
SupportsSVE128 = Features.Has(vixl::CPUFeatures::Feature::kSVE2);
|
||||
SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && vixl::aarch64::CPU::ReadSVEVectorLengthInBits() >= 256;
|
||||
SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && ReadSVEVectorLengthInBits() >= 256;
|
||||
#endif
|
||||
SupportsAVX = true;
|
||||
|
||||
|
@ -496,7 +496,7 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame* Fram
|
||||
uintptr_t branch = (uintptr_t)(Record)-8;
|
||||
|
||||
auto offset = HostCode / 4 - branch / 4;
|
||||
if (vixl::IsInt26(offset)) {
|
||||
if (ARMEmitter::Emitter::IsInt26(offset)) {
|
||||
// optimal case - can branch directly
|
||||
// patch the code
|
||||
ARMEmitter::Emitter emit((uint8_t*)(branch), 4);
|
||||
@ -729,7 +729,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, const FEXCore
|
||||
if (SpillSlots) {
|
||||
const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;
|
||||
|
||||
if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
|
||||
if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
|
||||
sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
|
||||
} else {
|
||||
LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize);
|
||||
@ -872,7 +872,7 @@ void Arm64JITCore::ResetStack() {
|
||||
|
||||
const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;
|
||||
|
||||
if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
|
||||
if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
|
||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
|
||||
} else {
|
||||
// Too big to fit in a 12bit immediate
|
||||
|
@ -5,13 +5,7 @@ tags: ir|opts
|
||||
desc: ConstProp, ZExt elim, const pooling, fcmp reduction, const inlining
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
|
||||
// aarch64 heuristics
|
||||
#include "aarch64/assembler-aarch64.h"
|
||||
#include "aarch64/cpu-aarch64.h"
|
||||
#include "aarch64/disasm-aarch64.h"
|
||||
#include "aarch64/assembler-aarch64.h"
|
||||
#include <CodeEmitter/Emitter.h>
|
||||
|
||||
#include "Interface/IR/IREmitter.h"
|
||||
#include "Interface/IR/PassManager.h"
|
||||
@ -56,10 +50,7 @@ static bool IsImmLogical(uint64_t imm, unsigned width) {
|
||||
if (width < 32) {
|
||||
width = 32;
|
||||
}
|
||||
return vixl::aarch64::Assembler::IsImmLogical(imm, width);
|
||||
}
|
||||
static bool IsImmAddSub(uint64_t imm) {
|
||||
return vixl::aarch64::Assembler::IsImmAddSub(imm);
|
||||
return ARMEmitter::Emitter::IsImmLogical(imm, width);
|
||||
}
|
||||
|
||||
static bool IsBfeAlreadyDone(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t Width) {
|
||||
@ -166,7 +157,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
|
||||
} else if (IsConstant1 && IsConstant2 && IROp->Op == OP_SUB) {
|
||||
uint64_t NewConstant = (Constant1 - Constant2) & getMask(IROp);
|
||||
IREmit->ReplaceWithConstant(CodeNode, NewConstant);
|
||||
} else if (IsConstant2 && !IsImmAddSub(Constant2) && IsImmAddSub(-Constant2)) {
|
||||
} else if (IsConstant2 && !ARMEmitter::IsImmAddSub(Constant2) && ARMEmitter::IsImmAddSub(-Constant2)) {
|
||||
// If the second argument is constant, the immediate is not ImmAddSub, but when negated is.
|
||||
// So, negate the operation to negate (and inline) the constant.
|
||||
if (IROp->Op == OP_ADD) {
|
||||
@ -611,7 +602,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
|
||||
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
|
||||
// We don't allow 8/16-bit operations to have constants, since no
|
||||
// constant would be in bounds after the JIT's 24/16 shift.
|
||||
if (IsImmAddSub(Constant2) && IROp->Size >= 4) {
|
||||
if (ARMEmitter::IsImmAddSub(Constant2) && IROp->Size >= 4) {
|
||||
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
|
||||
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
|
||||
}
|
||||
@ -656,7 +647,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
|
||||
case OP_CONDSUBNZCV: {
|
||||
uint64_t Constant2 {};
|
||||
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
|
||||
if (IsImmAddSub(Constant2)) {
|
||||
if (ARMEmitter::IsImmAddSub(Constant2)) {
|
||||
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
|
||||
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
|
||||
}
|
||||
@ -684,7 +675,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
|
||||
case OP_SELECT: {
|
||||
uint64_t Constant1 {};
|
||||
if (IREmit->IsValueConstant(IROp->Args[1], &Constant1)) {
|
||||
if (IsImmAddSub(Constant1)) {
|
||||
if (ARMEmitter::IsImmAddSub(Constant1)) {
|
||||
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
|
||||
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1));
|
||||
}
|
||||
@ -726,7 +717,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
|
||||
case OP_CONDJUMP: {
|
||||
uint64_t Constant2 {};
|
||||
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
|
||||
if (IsImmAddSub(Constant2)) {
|
||||
if (ARMEmitter::IsImmAddSub(Constant2)) {
|
||||
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
|
||||
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user