OpcodeDispatcher: Optimize MMX conversion operation

These instructions are now optimal
This commit is contained in:
Ryan Houdek 2023-08-24 15:46:19 -07:00
parent 72ce7ddf2d
commit c441b238c7
3 changed files with 21 additions and 12 deletions

View File

@ -6383,8 +6383,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
{0x28, 2, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
{0x2A, 1, &OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float<4, false>},
{0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
{0x2C, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, false>},
{0x2D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, true>},
{0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>},
{0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>},
{0x2E, 2, &OpDispatchBuilder::UCOMISxOp<4>},
{0x50, 1, &OpDispatchBuilder::MOVMSKOp<4>},
{0x51, 1, &OpDispatchBuilder::VectorUnaryOp<IR::OP_VFSQRT, 4, false>},
@ -6672,8 +6672,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
{0x28, 2, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
{0x2A, 1, &OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float<4, true>},
{0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
{0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, false>},
{0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true>},
{0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, false>},
{0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, true>},
{0x2E, 2, &OpDispatchBuilder::UCOMISxOp<8>},
{0x40, 16, &OpDispatchBuilder::CMOVOp},

View File

@ -377,7 +377,7 @@ public:
void Vector_CVT_Float_To_Int(OpcodeArgs);
template<size_t SrcElementSize, bool Widen>
void MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
template<size_t SrcElementSize, bool HostRoundingMode>
template<size_t SrcElementSize, bool Narrow, bool HostRoundingMode>
void XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs);
void MASKMOVOp(OpcodeArgs);
void MOVBetweenGPR_FPR(OpcodeArgs);

View File

@ -2110,16 +2110,21 @@ void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float<4, false>(OpcodeArgs)
template
void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float<4, true>(OpcodeArgs);
template<size_t SrcElementSize, bool HostRoundingMode>
template<size_t SrcElementSize, bool Narrow, bool HostRoundingMode>
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
// If loading a vector, use the full size, so we don't
// unnecessarily zero extend the vector. Otherwise, if
// memory, then we want to load the element size exactly.
const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op);
OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags, -1);
size_t ElementSize = SrcElementSize;
size_t Size = GetDstSize(Op);
// Always narrows
Src = _Vector_FToF(Size, SrcElementSize >> 1, Src, SrcElementSize);
ElementSize >>= 1;
if (Narrow) {
Src = _Vector_FToF(Size, SrcElementSize >> 1, Src, SrcElementSize);
ElementSize >>= 1;
}
if constexpr (HostRoundingMode) {
Src = _Vector_FToS(Size, ElementSize, Src);
@ -2132,9 +2137,13 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
}
template
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, false>(OpcodeArgs);
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>(OpcodeArgs);
template
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true>(OpcodeArgs);
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>(OpcodeArgs);
template
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, false>(OpcodeArgs);
template
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, true>(OpcodeArgs);
void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) {
const auto Size = GetSrcSize(Op);