AVX128: Implement support for vcmps{s,d}

2024-12-14 17:38:47 +00:00 · 2024-06-17 23:05:30 -07:00 · 2024-06-17 23:05:30 -07:00 · 9566dda73e
commit 9566dda73e
parent a0ced2b685
2 changed files with 28 additions and 2 deletions
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
@ -1036,6 +1036,8 @@ public:
  Ref AVX128_VFCMPImpl(size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType);
  template<size_t ElementSize>
  void AVX128_VFCMP(OpcodeArgs);
  template<size_t ElementSize>
  void AVX128_InsertScalarFCMP(OpcodeArgs);
  // End of AVX 128-bit implementation
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
@ -181,8 +181,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
    {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<4>},
    {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<8>},
-    // TODO: {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<4>},
+    {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<4>},
-    // TODO: {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<8>},
+    {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<8>},
    // TODO: {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp},
    // TODO: {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::PExtrOp<2>},
@ -950,4 +950,28 @@ void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs) {
  });
 }
 template<size_t ElementSize>
 void OpDispatchBuilder::AVX128_InsertScalarFCMP(OpcodeArgs) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = GetSrcSize(Op);
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  RefPair Src2 {};
  if (Op->Src[1].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
  } else {
    Src2.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags);
  }
  const uint8_t CompType = Op->Src[2].Literal();
  RefPair Result {};
  Result.Low = InsertScalarFCMPOpImpl(OpSize::i128Bit, OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, CompType, false);
  Result.High = LoadZeroVector(OpSize::i128Bit);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }
 } // namespace FEXCore::IR