Merge pull request #3919 from Sonicadvance1/remove_vestigial_vixl_usage

CodeEmitter: Removes vestigial vixl usage
This commit is contained in:
Alyssa Rosenzweig 2024-08-06 09:26:24 -04:00 committed by GitHub
commit 2da819c0f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 83 additions and 29 deletions

View File

@ -602,6 +602,18 @@ constexpr bool AreVectorsSequential(T first, const Args&... args) {
return (fn(first, args) && ...);
}
// Returns if the immediate can fit in to add/sub immediate instruction encodings.
constexpr bool IsImmAddSub(uint64_t imm) {
constexpr uint64_t U12Mask = 0xFFF;
auto FitsWithin12Bits = [](uint64_t imm) {
return (imm & ~U12Mask) == 0;
};
// Can fit in to the instruction encoding:
// - if only bits [11:0] are set.
// - if only bits [23:12] are set.
return FitsWithin12Bits(imm) || (FitsWithin12Bits(imm >> 12) && (imm & U12Mask) == 0);
}
// This is an emitter that is designed around the smallest code bloat as possible.
// Eschewing most developer convenience in order to keep code as small as possible.

View File

@ -36,9 +36,9 @@
// to by n, imm_s and imm_r are undefined.
static bool IsImmLogical(uint64_t value,
unsigned width,
unsigned* n,
unsigned* imm_s,
unsigned* imm_r) {
unsigned* n = nullptr,
unsigned* imm_s = nullptr,
unsigned* imm_r = nullptr) {
[[maybe_unused]] constexpr auto kBRegSize = 8;
[[maybe_unused]] constexpr auto kHRegSize = 16;
[[maybe_unused]] constexpr auto kSRegSize = 32;
@ -243,6 +243,46 @@ static bool IsImmLogical(uint64_t value,
return true;
}
static inline bool IsIntN(unsigned n, int64_t x) {
if (n == 64) return true;
int64_t limit = INT64_C(1) << (n - 1);
return (-limit <= x) && (x < limit);
}
static inline bool IsUintN(unsigned n, int64_t x) {
// Convert to an unsigned integer to avoid implementation-defined behavior.
return !(static_cast<uint64_t>(x) >> n);
}
// clang-format off
#define INT_1_TO_32_LIST(V) \
V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \
V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \
V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \
V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
#define INT_33_TO_63_LIST(V) \
V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \
V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \
V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \
V(57) V(58) V(59) V(60) V(61) V(62) V(63)
#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
// clang-format on
#define DECLARE_IS_INT_N(N) \
static inline bool IsInt##N(int64_t x) { return IsIntN(N, x); }
#define DECLARE_IS_UINT_N(N) \
static inline bool IsUint##N(int64_t x) { return IsUintN(N, x); }
INT_1_TO_63_LIST(DECLARE_IS_INT_N)
INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
#undef DECLARE_IS_INT_N
#undef DECLARE_IS_UINT_N
private:
template <typename V>

View File

@ -14,10 +14,12 @@
#include <CodeEmitter/Emitter.h>
#include <CodeEmitter/Registers.h>
#ifdef VIXL_DISASSEMBLER
#include <aarch64/cpu-aarch64.h>
#include <aarch64/instructions-aarch64.h>
#include <cpu-features.h>
#include <utils-vixl.h>
#endif
#include <array>
#include <tuple>
@ -349,8 +351,6 @@ Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr
}
#endif
CPU.SetUp();
// Number of register available is dependent on what operating mode the proccess is in.
if (EmitterCTX->Config.Is64BitMode()) {
StaticRegisters = x64::SRA;
@ -421,7 +421,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
if (RequiredMoveSegments > 1) {
// Only try to use this path if the number of segments is > 1.
// `movz` is better than `orr` since hardware will rename or merge if possible when `movz` is used.
const auto IsImm = vixl::aarch64::Assembler::IsImmLogical(Constant, RegSizeInBits(s));
const auto IsImm = ARMEmitter::Emitter::IsImmLogical(Constant, RegSizeInBits(s));
if (IsImm) {
orr(s, Reg, ARMEmitter::Reg::zr, Constant);
if (NOPPad) {
@ -458,7 +458,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
// If the aligned offset is within the 4GB window then we can use ADRP+ADD
// and the number of move segments more than 1
if (RequiredMoveSegments > 1 && vixl::IsInt32(AlignedOffset)) {
if (RequiredMoveSegments > 1 && ARMEmitter::Emitter::IsInt32(AlignedOffset)) {
// If this is 4k page aligned then we only need ADRP
if ((AlignedOffset & 0xFFF) == 0) {
adrp(Reg, AlignedOffset >> 12);
@ -466,7 +466,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
// If the constant is within 1MB of PC then we can still use ADR to load in a single instruction
// 21-bit signed integer here
int64_t SmallOffset = static_cast<int64_t>(Constant) - static_cast<int64_t>(PC);
if (vixl::IsInt21(SmallOffset)) {
if (ARMEmitter::Emitter::IsInt21(SmallOffset)) {
adr(Reg, SmallOffset);
} else {
// Need to use ADRP + ADD

View File

@ -94,7 +94,6 @@ protected:
Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr = nullptr, size_t size = 0);
FEXCore::Context::ContextImpl* EmitterCTX;
vixl::aarch64::CPU CPU;
std::span<const ARMEmitter::Register> ConfiguredDynamicRegisterBase {};
std::span<const ARMEmitter::Register> StaticRegisters {};

View File

@ -51,11 +51,23 @@ static uint32_t GetMIDR() {
return Result;
}
__attribute__((naked)) static uint64_t ReadSVEVectorLengthInBits() {
///< Can't use rdvl instruction directly because compilers will complain that sve/sme is required.
__asm(R"(
.word 0x04bf5100 // rdvl x0, #8
ret;
)");
}
#else
static uint32_t GetDCZID() {
// Return unsupported
return DCZID_DZP_MASK;
}
static int ReadSVEVectorLengthInBits() {
// Return unsupported
return 0;
}
#endif
static void OverrideFeatures(HostFeatures* Features, uint64_t ForceSVEWidth) {
@ -164,7 +176,7 @@ HostFeatures::HostFeatures() {
SupportsSVE256 = ForceSVEWidth() ? ForceSVEWidth() >= 256 : true;
#else
SupportsSVE128 = Features.Has(vixl::CPUFeatures::Feature::kSVE2);
SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && vixl::aarch64::CPU::ReadSVEVectorLengthInBits() >= 256;
SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && ReadSVEVectorLengthInBits() >= 256;
#endif
SupportsAVX = true;

View File

@ -496,7 +496,7 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame* Fram
uintptr_t branch = (uintptr_t)(Record)-8;
auto offset = HostCode / 4 - branch / 4;
if (vixl::IsInt26(offset)) {
if (ARMEmitter::Emitter::IsInt26(offset)) {
// optimal case - can branch directly
// patch the code
ARMEmitter::Emitter emit((uint8_t*)(branch), 4);
@ -729,7 +729,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, const FEXCore
if (SpillSlots) {
const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;
if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
} else {
LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize);
@ -872,7 +872,7 @@ void Arm64JITCore::ResetStack() {
const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;
if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
} else {
// Too big to fit in a 12bit immediate

View File

@ -5,13 +5,7 @@ tags: ir|opts
desc: ConstProp, ZExt elim, const pooling, fcmp reduction, const inlining
$end_info$
*/
// aarch64 heuristics
#include "aarch64/assembler-aarch64.h"
#include "aarch64/cpu-aarch64.h"
#include "aarch64/disasm-aarch64.h"
#include "aarch64/assembler-aarch64.h"
#include <CodeEmitter/Emitter.h>
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
@ -56,10 +50,7 @@ static bool IsImmLogical(uint64_t imm, unsigned width) {
if (width < 32) {
width = 32;
}
return vixl::aarch64::Assembler::IsImmLogical(imm, width);
}
static bool IsImmAddSub(uint64_t imm) {
return vixl::aarch64::Assembler::IsImmAddSub(imm);
return ARMEmitter::Emitter::IsImmLogical(imm, width);
}
static bool IsBfeAlreadyDone(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t Width) {
@ -166,7 +157,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
} else if (IsConstant1 && IsConstant2 && IROp->Op == OP_SUB) {
uint64_t NewConstant = (Constant1 - Constant2) & getMask(IROp);
IREmit->ReplaceWithConstant(CodeNode, NewConstant);
} else if (IsConstant2 && !IsImmAddSub(Constant2) && IsImmAddSub(-Constant2)) {
} else if (IsConstant2 && !ARMEmitter::IsImmAddSub(Constant2) && ARMEmitter::IsImmAddSub(-Constant2)) {
// If the second argument is constant, the immediate is not ImmAddSub, but when negated is.
// So, negate the operation to negate (and inline) the constant.
if (IROp->Op == OP_ADD) {
@ -611,7 +602,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
// We don't allow 8/16-bit operations to have constants, since no
// constant would be in bounds after the JIT's 24/16 shift.
if (IsImmAddSub(Constant2) && IROp->Size >= 4) {
if (ARMEmitter::IsImmAddSub(Constant2) && IROp->Size >= 4) {
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
}
@ -656,7 +647,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
case OP_CONDSUBNZCV: {
uint64_t Constant2 {};
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
if (IsImmAddSub(Constant2)) {
if (ARMEmitter::IsImmAddSub(Constant2)) {
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
}
@ -684,7 +675,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
case OP_SELECT: {
uint64_t Constant1 {};
if (IREmit->IsValueConstant(IROp->Args[1], &Constant1)) {
if (IsImmAddSub(Constant1)) {
if (ARMEmitter::IsImmAddSub(Constant1)) {
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1));
}
@ -726,7 +717,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
case OP_CONDJUMP: {
uint64_t Constant2 {};
if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
if (IsImmAddSub(Constant2)) {
if (ARMEmitter::IsImmAddSub(Constant2)) {
IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
}