IR: Change VSToFVectorInsert to use IR::OpSize

2024-11-30 10:11:07 +00:00 · 2024-10-27 18:27:39 -07:00 · 2024-10-27 18:27:39 -07:00 · 5626f4e50a
commit 5626f4e50a
parent efbc42dac3
6 changed files with 16 additions and 16 deletions
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
@ -5025,8 +5025,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
    {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},

-    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<4>},
-    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<8>},
+    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i32Bit>},
+    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i64Bit>},

    {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp},
    {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp},
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
@ -533,9 +533,9 @@ public:
  void AVXVectorScalarUnaryInsertALUOp(OpcodeArgs);

  void InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
-  template<size_t DstElementSize>
+  template<IR::OpSize DstElementSize>
  void InsertCVTGPR_To_FPR(OpcodeArgs);
-  template<size_t DstElementSize>
+  template<IR::OpSize DstElementSize>
  void AVXInsertCVTGPR_To_FPR(OpcodeArgs);

  template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
@ -968,7 +968,7 @@ public:
  template<size_t ElementSize>
  void AVX128_VPUNPCKH(OpcodeArgs);
  void AVX128_MOVVectorUnaligned(OpcodeArgs);
-  template<size_t DstElementSize>
+  template<IR::OpSize DstElementSize>
  void AVX128_InsertCVTGPR_To_FPR(OpcodeArgs);
  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
  void AVX128_CVTFPR_To_GPR(OpcodeArgs);
@ -1452,7 +1452,7 @@ private:
  Ref VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize,
                                       const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);

-  Ref InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op,
+  Ref InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op,
                              const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);

  Ref InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
@ -57,8 +57,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
    {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS},

-    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<4>},
-    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<8>},
+    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<OpSize::i32Bit>},
+    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<OpSize::i64Bit>},

    {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},
    {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},
@ -1004,7 +1004,7 @@ void OpDispatchBuilder::AVX128_MOVVectorUnaligned(OpcodeArgs) {
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
 }

-template<size_t DstElementSize>
+template<IR::OpSize DstElementSize>
 void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto DstSize = GetDstSize(Op);
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
@ -151,7 +151,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
  {0x10, 2, &OpDispatchBuilder::MOVSSOp},
  {0x12, 1, &OpDispatchBuilder::VMOVSLDUPOp},
  {0x16, 1, &OpDispatchBuilder::VMOVSHDUPOp},
-  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<4>},
+  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i32Bit>},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, true>},
@ -181,7 +181,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
 constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDispatch_SecondaryRepNEModTables[] = {
  {0x10, 2, &OpDispatchBuilder::MOVSDOp},
  {0x12, 1, &OpDispatchBuilder::MOVDDUPOp},
-  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<8>},
+  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i64Bit>},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, true>},
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
@ -416,14 +416,14 @@ void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
  Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);

  // Always 32-bit.
-  const size_t ElementSize = OpSize::i32Bit;
+  const auto ElementSize = OpSize::i32Bit;
  // Always signed
  Dest = _VSToFVectorInsert(IR::SizeToOpSize(DstSize), ElementSize, ElementSize, Dest, Src, true, false);

  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Dest, DstSize, OpSize::iInvalid);
 }

-Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op,
+Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op,
                                               const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
@ -451,7 +451,7 @@ Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, s
  return _VSToFVectorInsert(IR::SizeToOpSize(DstSize), DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits);
 }

-template<size_t DstElementSize>
+template<IR::OpSize DstElementSize>
 void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Dest, Op->Src[0], false);
@ -461,7 +461,7 @@ void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs) {
 template void OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
 template void OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);

-template<size_t DstElementSize>
+template<IR::OpSize DstElementSize>
 void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  Ref Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Src[0], Op->Src[1], true);
--- a/FEXCore/Source/Interface/IR/IR.json
+++ b/FEXCore/Source/Interface/IR/IR.json
@ -1785,7 +1785,7 @@
        "DestSize": "RegisterSize",
        "NumElements": "RegisterSize / DstElementSize"
      },
-      "FPR = VSToFVectorInsert OpSize:#RegisterSize, u8:#DstElementSize, u8:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i8:$HasTwoElements, i1:$ZeroUpperBits": {
+      "FPR = VSToFVectorInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i8:$HasTwoElements, i1:$ZeroUpperBits": {
        "Desc": ["Does a Vector 'scvt' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",