mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-09 23:30:37 +00:00
Merge pull request #2408 from lioncash/pmaddwd
OpcodeDispatcher: Handle VPMADDWD
This commit is contained in:
commit
efafe0e6e9
@ -6010,6 +6010,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::VPSLLOp<4>},
|
||||
{OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::VPSLLOp<8>},
|
||||
{OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::VPMULLOp<4, false>},
|
||||
{OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::VPMADDWDOp},
|
||||
{OPD(1, 0b01, 0xF7), 1, &OpDispatchBuilder::MASKMOVOp},
|
||||
|
||||
{OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::AVXVectorALUOp<IR::OP_VSUB, 1>},
|
||||
|
@ -485,6 +485,8 @@ public:
|
||||
template <size_t ElementSize>
|
||||
void VPHSUBOp(OpcodeArgs);
|
||||
|
||||
void VPMADDWDOp(OpcodeArgs);
|
||||
|
||||
void VPMULHRSWOp(OpcodeArgs);
|
||||
|
||||
template <bool Signed>
|
||||
@ -809,6 +811,9 @@ private:
|
||||
OrderedNode* PHSUBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1,
|
||||
const X86Tables::DecodedOperand& Src2, size_t ElementSize);
|
||||
|
||||
OrderedNode* PMADDWDOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1,
|
||||
const X86Tables::DecodedOperand& Src2);
|
||||
|
||||
OrderedNode* PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, OrderedNode *Src2);
|
||||
|
||||
OrderedNode* PMULHWOpImpl(OpcodeArgs, bool Signed,
|
||||
|
@ -2898,7 +2898,8 @@ void OpDispatchBuilder::VPFCMPOp<1>(OpcodeArgs);
|
||||
template
|
||||
void OpDispatchBuilder::VPFCMPOp<2>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::PMADDWD(OpcodeArgs) {
|
||||
OrderedNode* OpDispatchBuilder::PMADDWDOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1,
|
||||
const X86Tables::DecodedOperand& Src2) {
|
||||
// This is a pretty curious operation
|
||||
// Does two MADD operations across 4 16bit signed integers and accumulates to 32bit integers in the destination
|
||||
//
|
||||
@ -2909,25 +2910,34 @@ void OpDispatchBuilder::PMADDWD(OpcodeArgs) {
|
||||
|
||||
auto Size = GetSrcSize(Op);
|
||||
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
OrderedNode *Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags, -1);
|
||||
OrderedNode *Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags, -1);
|
||||
|
||||
if (Size == 8) {
|
||||
Size <<= 1;
|
||||
}
|
||||
|
||||
auto Src1_L = _VSXTL(Size, 2, Src1); // [15:0 ], [31:16], [32:47 ], [63:48 ]
|
||||
auto Src1_H = _VSXTL2(Size, 2, Src1); // [79:64], [95:80], [111:96], [127:112]
|
||||
auto Src1_L = _VSXTL(Size, 2, Src1Node); // [15:0 ], [31:16], [32:47 ], [63:48 ]
|
||||
auto Src1_H = _VSXTL2(Size, 2, Src1Node); // [79:64], [95:80], [111:96], [127:112]
|
||||
|
||||
auto Src2_L = _VSXTL(Size, 2, Src2); // [15:0 ], [31:16], [32:47 ], [63:48 ]
|
||||
auto Src2_H = _VSXTL2(Size, 2, Src2); // [79:64], [95:80], [111:96], [127:112]
|
||||
auto Src2_L = _VSXTL(Size, 2, Src2Node); // [15:0 ], [31:16], [32:47 ], [63:48 ]
|
||||
auto Src2_H = _VSXTL2(Size, 2, Src2Node); // [79:64], [95:80], [111:96], [127:112]
|
||||
|
||||
auto Res_L = _VSMul(Size, 4, Src1_L, Src2_L); // [15:0 ], [31:16], [32:47 ], [63:48 ] : Original elements
|
||||
auto Res_H = _VSMul(Size, 4, Src1_H, Src2_H); // [79:64], [95:80], [111:96], [127:112] : Original elements
|
||||
|
||||
// [15:0 ] + [31:16], [32:47 ] + [63:48 ], [79:64] + [95:80], [111:96] + [127:112]
|
||||
auto Res = _VAddP(Size, 4, Res_L, Res_H);
|
||||
StoreResult(FPRClass, Op, Res, -1);
|
||||
return _VAddP(Size, 4, Res_L, Res_H);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::PMADDWD(OpcodeArgs) {
|
||||
OrderedNode *Result = PMADDWDOpImpl(Op, Op->Dest, Op->Src[0]);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VPMADDWDOp(OpcodeArgs) {
|
||||
OrderedNode *Result = PMADDWDOpImpl(Op, Op->Src[0], Op->Src[1]);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::PMADDUBSW(OpcodeArgs) {
|
||||
|
@ -246,7 +246,7 @@ void InitializeVEXTables() {
|
||||
{OPD(1, 0b01, 0xF2), 1, X86InstInfo{"VPSLLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF3), 1, X86InstInfo{"VPSLLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF4), 1, X86InstInfo{"VPMULUDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF5), 1, X86InstInfo{"VPMADDWD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF5), 1, X86InstInfo{"VPMADDWD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF6), 1, X86InstInfo{"VPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(1, 0b01, 0xF7), 1, X86InstInfo{"VMASKMOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 0, nullptr}},
|
||||
|
||||
|
36
unittests/ASM/VEX/vpmaddwd.asm
Normal file
36
unittests/ASM/VEX/vpmaddwd.asm
Normal file
@ -0,0 +1,36 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"HostFeatures": ["AVX"],
|
||||
"RegData": {
|
||||
"XMM2": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM3": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM4": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x41FD357ADA74036A", "0xCCCC999AE38E1C72"],
|
||||
"XMM5": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x41FD357ADA74036A", "0xCCCC999AE38E1C72"]
|
||||
}
|
||||
}
|
||||
%endif
|
||||
|
||||
lea rdx, [rel .data]
|
||||
|
||||
vmovaps ymm0, [rdx]
|
||||
vmovaps ymm1, [rdx + 32]
|
||||
|
||||
vpmaddwd xmm2, xmm0, [rdx + 32]
|
||||
vpmaddwd xmm3, xmm0, xmm1
|
||||
|
||||
vpmaddwd ymm4, ymm0, [rdx + 32]
|
||||
vpmaddwd ymm5, ymm0, ymm1
|
||||
|
||||
hlt
|
||||
|
||||
align 32
|
||||
.data:
|
||||
dq 0x4142434445464748
|
||||
dq 0x5152535455565758
|
||||
dq 0x6666777788889999
|
||||
dq 0x5555444433332222
|
||||
|
||||
dq 0x6162636465666768
|
||||
dq 0x7172737475767778
|
||||
dq 0x5555444433332222
|
||||
dq 0xAAAAAAAAAAAAAAAA
|
Loading…
Reference in New Issue
Block a user