Merge pull request #3919 from Sonicadvance1/remove_vestigial_vixl_usage

CodeEmitter: Removes vestigial vixl usage
2024-12-15 01:49:00 +00:00 · 2024-08-06 09:26:24 -04:00 · 2024-08-06 09:26:24 -04:00 · 2da819c0f3
commit 2da819c0f3
parent e0c783de74 70c02d5c58
7 changed files with 83 additions and 29 deletions
--- a/CodeEmitter/CodeEmitter/Emitter.h
+++ b/CodeEmitter/CodeEmitter/Emitter.h
@ -602,6 +602,18 @@ constexpr bool AreVectorsSequential(T first, const Args&... args) {
  return (fn(first, args) && ...);
 }

+// Returns if the immediate can fit in to add/sub immediate instruction encodings.
+constexpr bool IsImmAddSub(uint64_t imm) {
+  constexpr uint64_t U12Mask = 0xFFF;
+  auto FitsWithin12Bits = [](uint64_t imm) {
+    return (imm & ~U12Mask) == 0;
+  };
+  // Can fit in to the instruction encoding:
+  // - if only bits [11:0] are set.
+  // - if only bits [23:12] are set.
+  return FitsWithin12Bits(imm) || (FitsWithin12Bits(imm >> 12) && (imm & U12Mask) == 0);
+}
+
 // This is an emitter that is designed around the smallest code bloat as possible.
 // Eschewing most developer convenience in order to keep code as small as possible.

--- a/CodeEmitter/CodeEmitter/VixlUtils.inl
+++ b/CodeEmitter/CodeEmitter/VixlUtils.inl
@ -36,9 +36,9 @@
 // to by n, imm_s and imm_r are undefined.
 static bool IsImmLogical(uint64_t value,
                             unsigned width,
-                             unsigned* n,
-                             unsigned* imm_s,
-                             unsigned* imm_r) {
+                             unsigned* n = nullptr,
+                             unsigned* imm_s = nullptr,
+                             unsigned* imm_r = nullptr) {
  [[maybe_unused]] constexpr auto kBRegSize = 8;
  [[maybe_unused]] constexpr auto kHRegSize = 16;
  [[maybe_unused]] constexpr auto kSRegSize = 32;
@ -243,6 +243,46 @@ static bool IsImmLogical(uint64_t value,
  return true;
 }

+static inline bool IsIntN(unsigned n, int64_t x) {
+  if (n == 64) return true;
+  int64_t limit = INT64_C(1) << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+
+static inline bool IsUintN(unsigned n, int64_t x) {
+  // Convert to an unsigned integer to avoid implementation-defined behavior.
+  return !(static_cast<uint64_t>(x) >> n);
+}
+
+// clang-format off
+#define INT_1_TO_32_LIST(V)                                                    \
+V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
+V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
+
+#define INT_33_TO_63_LIST(V)                                                   \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
+
+// clang-format on
+
+#define DECLARE_IS_INT_N(N)                                       \
+  static inline bool IsInt##N(int64_t x) { return IsIntN(N, x); }
+
+#define DECLARE_IS_UINT_N(N)                                        \
+  static inline bool IsUint##N(int64_t x) { return IsUintN(N, x); }
+
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+
 private:

 template <typename V>
--- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp
+++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp
@ -14,10 +14,12 @@
 #include <CodeEmitter/Emitter.h>
 #include <CodeEmitter/Registers.h>

+#ifdef VIXL_DISASSEMBLER
 #include <aarch64/cpu-aarch64.h>
 #include <aarch64/instructions-aarch64.h>
 #include <cpu-features.h>
 #include <utils-vixl.h>
+#endif

 #include <array>
 #include <tuple>
@ -349,8 +351,6 @@ Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr
  }
 #endif

-  CPU.SetUp();
-
  // Number of register available is dependent on what operating mode the proccess is in.
  if (EmitterCTX->Config.Is64BitMode()) {
    StaticRegisters = x64::SRA;
@ -421,7 +421,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
  if (RequiredMoveSegments > 1) {
    // Only try to use this path if the number of segments is > 1.
    // `movz` is better than `orr` since hardware will rename or merge if possible when `movz` is used.
-    const auto IsImm = vixl::aarch64::Assembler::IsImmLogical(Constant, RegSizeInBits(s));
+    const auto IsImm = ARMEmitter::Emitter::IsImmLogical(Constant, RegSizeInBits(s));
    if (IsImm) {
      orr(s, Reg, ARMEmitter::Reg::zr, Constant);
      if (NOPPad) {
@ -458,7 +458,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui

  // If the aligned offset is within the 4GB window then we can use ADRP+ADD
  // and the number of move segments more than 1
-  if (RequiredMoveSegments > 1 && vixl::IsInt32(AlignedOffset)) {
+  if (RequiredMoveSegments > 1 && ARMEmitter::Emitter::IsInt32(AlignedOffset)) {
    // If this is 4k page aligned then we only need ADRP
    if ((AlignedOffset & 0xFFF) == 0) {
      adrp(Reg, AlignedOffset >> 12);
@ -466,7 +466,7 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui
      // If the constant is within 1MB of PC then we can still use ADR to load in a single instruction
      // 21-bit signed integer here
      int64_t SmallOffset = static_cast<int64_t>(Constant) - static_cast<int64_t>(PC);
-      if (vixl::IsInt21(SmallOffset)) {
+      if (ARMEmitter::Emitter::IsInt21(SmallOffset)) {
        adr(Reg, SmallOffset);
      } else {
        // Need to use ADRP + ADD
--- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h
+++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h
@ -94,7 +94,6 @@ protected:
  Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr = nullptr, size_t size = 0);

  FEXCore::Context::ContextImpl* EmitterCTX;
-  vixl::aarch64::CPU CPU;

  std::span<const ARMEmitter::Register> ConfiguredDynamicRegisterBase {};
  std::span<const ARMEmitter::Register> StaticRegisters {};
--- a/FEXCore/Source/Interface/Core/HostFeatures.cpp
+++ b/FEXCore/Source/Interface/Core/HostFeatures.cpp
@ -51,11 +51,23 @@ static uint32_t GetMIDR() {
  return Result;
 }

+__attribute__((naked)) static uint64_t ReadSVEVectorLengthInBits() {
+  ///< Can't use rdvl instruction directly because compilers will complain that sve/sme is required.
+  __asm(R"(
+  .word 0x04bf5100 // rdvl x0, #8
+  ret;
+  )");
+}
 #else
 static uint32_t GetDCZID() {
  // Return unsupported
  return DCZID_DZP_MASK;
 }
+
+static int ReadSVEVectorLengthInBits() {
+  // Return unsupported
+  return 0;
+}
 #endif

 static void OverrideFeatures(HostFeatures* Features, uint64_t ForceSVEWidth) {
@ -164,7 +176,7 @@ HostFeatures::HostFeatures() {
  SupportsSVE256 = ForceSVEWidth() ? ForceSVEWidth() >= 256 : true;
 #else
  SupportsSVE128 = Features.Has(vixl::CPUFeatures::Feature::kSVE2);
-  SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && vixl::aarch64::CPU::ReadSVEVectorLengthInBits() >= 256;
+  SupportsSVE256 = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && ReadSVEVectorLengthInBits() >= 256;
 #endif
  SupportsAVX = true;

--- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp
@ -496,7 +496,7 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame* Fram
  uintptr_t branch = (uintptr_t)(Record)-8;

  auto offset = HostCode / 4 - branch / 4;
-  if (vixl::IsInt26(offset)) {
+  if (ARMEmitter::Emitter::IsInt26(offset)) {
    // optimal case - can branch directly
    // patch the code
    ARMEmitter::Emitter emit((uint8_t*)(branch), 4);
@ -729,7 +729,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, const FEXCore
  if (SpillSlots) {
    const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

-    if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
+    if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
      sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
    } else {
      LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize);
@ -872,7 +872,7 @@ void Arm64JITCore::ResetStack() {

  const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

-  if (vixl::aarch64::Assembler::IsImmAddSub(TotalSpillSlotsSize)) {
+  if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
    add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
  } else {
    // Too big to fit in a 12bit immediate
--- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
@ -5,13 +5,7 @@ tags: ir|opts
 desc: ConstProp, ZExt elim, const pooling, fcmp reduction, const inlining
 $end_info$
 */
-
-
-// aarch64 heuristics
-#include "aarch64/assembler-aarch64.h"
-#include "aarch64/cpu-aarch64.h"
-#include "aarch64/disasm-aarch64.h"
-#include "aarch64/assembler-aarch64.h"
+#include <CodeEmitter/Emitter.h>

 #include "Interface/IR/IREmitter.h"
 #include "Interface/IR/PassManager.h"
@ -56,10 +50,7 @@ static bool IsImmLogical(uint64_t imm, unsigned width) {
  if (width < 32) {
    width = 32;
  }
-  return vixl::aarch64::Assembler::IsImmLogical(imm, width);
-}
-static bool IsImmAddSub(uint64_t imm) {
-  return vixl::aarch64::Assembler::IsImmAddSub(imm);
+  return ARMEmitter::Emitter::IsImmLogical(imm, width);
 }

 static bool IsBfeAlreadyDone(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t Width) {
@ -166,7 +157,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    } else if (IsConstant1 && IsConstant2 && IROp->Op == OP_SUB) {
      uint64_t NewConstant = (Constant1 - Constant2) & getMask(IROp);
      IREmit->ReplaceWithConstant(CodeNode, NewConstant);
-    } else if (IsConstant2 && !IsImmAddSub(Constant2) && IsImmAddSub(-Constant2)) {
+    } else if (IsConstant2 && !ARMEmitter::IsImmAddSub(Constant2) && ARMEmitter::IsImmAddSub(-Constant2)) {
      // If the second argument is constant, the immediate is not ImmAddSub, but when negated is.
      // So, negate the operation to negate (and inline) the constant.
      if (IROp->Op == OP_ADD) {
@ -611,7 +602,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
      if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
        // We don't allow 8/16-bit operations to have constants, since no
        // constant would be in bounds after the JIT's 24/16 shift.
-        if (IsImmAddSub(Constant2) && IROp->Size >= 4) {
+        if (ARMEmitter::IsImmAddSub(Constant2) && IROp->Size >= 4) {
          IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
          IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
        }
@ -656,7 +647,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
    case OP_CONDSUBNZCV: {
      uint64_t Constant2 {};
      if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
-        if (IsImmAddSub(Constant2)) {
+        if (ARMEmitter::IsImmAddSub(Constant2)) {
          IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
          IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
        }
@ -684,7 +675,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
    case OP_SELECT: {
      uint64_t Constant1 {};
      if (IREmit->IsValueConstant(IROp->Args[1], &Constant1)) {
-        if (IsImmAddSub(Constant1)) {
+        if (ARMEmitter::IsImmAddSub(Constant1)) {
          IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
          IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1));
        }
@ -726,7 +717,7 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
    case OP_CONDJUMP: {
      uint64_t Constant2 {};
      if (IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
-        if (IsImmAddSub(Constant2)) {
+        if (ARMEmitter::IsImmAddSub(Constant2)) {
          IREmit->SetWriteCursor(CurrentIR.GetNode(IROp->Args[1]));
          IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));
        }