diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 032937d83..46d346565 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1036,6 +1036,8 @@ public: Ref AVX128_VFCMPImpl(size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType); template void AVX128_VFCMP(OpcodeArgs); + template + void AVX128_InsertScalarFCMP(OpcodeArgs); // End of AVX 128-bit implementation diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index def9054b2..116855c32 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -181,8 +181,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<4>}, {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<8>}, - // TODO: {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<4>}, - // TODO: {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<8>}, + {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<4>}, + {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<8>}, // TODO: {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp}, // TODO: {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::PExtrOp<2>}, @@ -950,4 +950,28 @@ void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs) { }); } +template +void OpDispatchBuilder::AVX128_InsertScalarFCMP(OpcodeArgs) { + // We load the full vector width when dealing with a source vector, + // so that we don't do any unnecessary zero extension to the scalar + // element that we're going to operate on. + const auto SrcSize = GetSrcSize(Op); + + auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); + RefPair Src2 {}; + + if (Op->Src[1].IsGPR()) { + Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false); + } else { + Src2.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags); + } + + const uint8_t CompType = Op->Src[2].Literal(); + + RefPair Result {}; + Result.Low = InsertScalarFCMPOpImpl(OpSize::i128Bit, OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, CompType, false); + Result.High = LoadZeroVector(OpSize::i128Bit); + AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); +} + } // namespace FEXCore::IR