OpcodeDispatcher: Handle VDPPS

This commit is contained in:
lioncash 2022-12-31 20:35:36 +00:00
parent 74526d1f02
commit dae1676e4a
6 changed files with 161 additions and 1 deletions

View File

@ -6076,6 +6076,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::VINSERTOp},
{OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp},
{OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::VDPPOp<4>},
{OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op},
{OPD(3, 0b01, 0xDF), 1, &OpDispatchBuilder::VAESKeyGenAssistOp},

View File

@ -440,6 +440,9 @@ public:
template <size_t ElementSize>
void VBROADCASTOp(OpcodeArgs);
template <size_t ElementSize>
void VDPPOp(OpcodeArgs);
template <IROps IROp, size_t ElementSize>
void VHADDPOp(OpcodeArgs);

View File

@ -3534,6 +3534,18 @@ void OpDispatchBuilder::DPPOp<4>(OpcodeArgs);
template
void OpDispatchBuilder::DPPOp<8>(OpcodeArgs);
template <size_t ElementSize>
void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
OrderedNode *Result = DPPOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], ElementSize);
// We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector
// to construct the results, so the upper lane will always be cleared for the 128-bit version.
StoreResult(FPRClass, Op, Result, -1);
}
template
void OpDispatchBuilder::VDPPOp<4>(OpcodeArgs);
void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
uint8_t Select = Op->Src[1].Data.Literal.Value;

View File

@ -437,7 +437,7 @@ void InitializeVEXTables() {
{OPD(3, 0b01, 0x38), 1, X86InstInfo{"VINSERTI128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x39), 1, X86InstInfo{"VEXTRACTI128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x40), 1, X86InstInfo{"VDPPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x40), 1, X86InstInfo{"VDPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x41), 1, X86InstInfo{"VDPPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x42), 1, X86InstInfo{"VMPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x44), 1, X86InstInfo{"VPCLMULQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},

View File

@ -0,0 +1,56 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM8": ["0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x0000000000000000", "0x0000000000000000"],
"XMM9": ["0x499A5226499A5226", "0x499A5226499A5226", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x0000000000000000", "0x0000000000000000"],
"XMM11": ["0x495F7816495F7816", "0x495F7816495F7816", "0x0000000000000000", "0x0000000000000000"],
"XMM12": ["0x496E3962496E3962", "0x496E3962496E3962", "0x0000000000000000", "0x0000000000000000"],
"XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]
vmovaps xmm5, [rdx + 16 * 5]
vmovaps xmm6, [rdx + 16 * 6]
vmovaps xmm7, [rdx + 16 * 7]
vdpps xmm8, xmm0, [rdx + 16 * 8], 11111111b
vdpps xmm9, xmm1, [rdx + 16 * 9], 11111111b
vdpps xmm10, xmm2, [rdx + 16 * 10], 11111111b
vdpps xmm11, xmm3, [rdx + 16 * 11], 11111111b
vdpps xmm12, xmm4, [rdx + 16 * 12], 11111111b
vdpps xmm13, xmm5, [rdx + 16 * 13], 00000000b
vdpps xmm14, xmm6, [rdx + 16 * 14], 11110000b
vdpps xmm15, xmm7, [rdx + 16 * 15], 00001111b
hlt
align 32
.data:
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 754.227 , 586.0859, 127.7574, 114.8167
dd 764.4266, 226.6145, 337.864 , 320.3296
dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 265.8255, 536.4473, 754.3489, 460.681
dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 306.0592, 702.7584, 887.6473, 551.5908
dd 620.9001, 520.9829, 232.9532, 510.3388
dd 204.8474, 225.626 , 564.973 , 790.5175
dd 836.1953, 844.5266, 633.5626, 501.7409
dd 393.2616, 674.4415, 244.3265, 971.1598
dd 770.8029, 746.1836, 255.9902, 567.7578
dd 187.7175, 924.181 , 466.4362, 169.8267
dd 651.7481, 462.4206, 396.6924, 355.8538
dd 6.148 , 523.1443, 989.7004, 713.6646
dd 497.5427, 657.6965, 651.0534, 778.5236

View File

@ -0,0 +1,87 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM8": ["0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E"],
"XMM9": ["0x499A5226499A5226", "0x499A5226499A5226", "0x499A5226499A5226", "0x499A5226499A5226"],
"XMM10": ["0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4"],
"XMM11": ["0x495F7816495F7816", "0x495F7816495F7816", "0x495F7816495F7816", "0x495F7816495F7816"],
"XMM12": ["0x496E3962496E3962", "0x496E3962496E3962", "0x496E3962496E3962", "0x496E3962496E3962"],
"XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]
vmovaps ymm4, [rdx + 32 * 4]
vmovaps ymm5, [rdx + 32 * 5]
vmovaps ymm6, [rdx + 32 * 6]
vmovaps ymm7, [rdx + 32 * 7]
vdpps ymm8, ymm0, [rdx + 32 * 8], 11111111b
vdpps ymm9, ymm1, [rdx + 32 * 9], 11111111b
vdpps ymm10, ymm2, [rdx + 32 * 10], 11111111b
vdpps ymm11, ymm3, [rdx + 32 * 11], 11111111b
vdpps ymm12, ymm4, [rdx + 32 * 12], 11111111b
vdpps ymm13, ymm5, [rdx + 32 * 13], 00000000b
vdpps ymm14, ymm6, [rdx + 32 * 14], 11110000b
vdpps ymm15, ymm7, [rdx + 32 * 15], 00001111b
hlt
align 32
.data:
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 754.227 , 586.0859, 127.7574, 114.8167
dd 754.227 , 586.0859, 127.7574, 114.8167
dd 764.4266, 226.6145, 337.864 , 320.3296
dd 764.4266, 226.6145, 337.864 , 320.3296
dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 265.8255, 536.4473, 754.3489, 460.681
dd 265.8255, 536.4473, 754.3489, 460.681
dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 306.0592, 702.7584, 887.6473, 551.5908
dd 306.0592, 702.7584, 887.6473, 551.5908
dd 620.9001, 520.9829, 232.9532, 510.3388
dd 620.9001, 520.9829, 232.9532, 510.3388
dd 204.8474, 225.626 , 564.973 , 790.5175
dd 204.8474, 225.626 , 564.973 , 790.5175
dd 836.1953, 844.5266, 633.5626, 501.7409
dd 836.1953, 844.5266, 633.5626, 501.7409
dd 393.2616, 674.4415, 244.3265, 971.1598
dd 393.2616, 674.4415, 244.3265, 971.1598
dd 770.8029, 746.1836, 255.9902, 567.7578
dd 770.8029, 746.1836, 255.9902, 567.7578
dd 187.7175, 924.181 , 466.4362, 169.8267
dd 187.7175, 924.181 , 466.4362, 169.8267
dd 651.7481, 462.4206, 396.6924, 355.8538
dd 651.7481, 462.4206, 396.6924, 355.8538
dd 6.148 , 523.1443, 989.7004, 713.6646
dd 6.148 , 523.1443, 989.7004, 713.6646
dd 497.5427, 657.6965, 651.0534, 778.5236
dd 497.5427, 657.6965, 651.0534, 778.5236