OpcodeDispatcher: Handle VDPPD

x86 just doesn't have a 256-bit version of this op.
This commit is contained in:
lioncash 2022-12-31 21:03:02 +00:00
parent dae1676e4a
commit 769c548ba4
4 changed files with 60 additions and 1 deletions

View File

@ -6077,6 +6077,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp},
{OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::VDPPOp<4>},
{OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::VDPPOp<8>},
{OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op},

View File

@ -3545,6 +3545,8 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
template
void OpDispatchBuilder::VDPPOp<4>(OpcodeArgs);
template
void OpDispatchBuilder::VDPPOp<8>(OpcodeArgs);
void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");

View File

@ -438,7 +438,7 @@ void InitializeVEXTables() {
{OPD(3, 0b01, 0x39), 1, X86InstInfo{"VEXTRACTI128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x40), 1, X86InstInfo{"VDPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x41), 1, X86InstInfo{"VDPPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x41), 1, X86InstInfo{"VDPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x42), 1, X86InstInfo{"VMPSADBW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x44), 1, X86InstInfo{"VPCLMULQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b01, 0x46), 1, X86InstInfo{"VPERM2I128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}},

View File

@ -0,0 +1,56 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM8": ["0x41278C496C911A6E", "0x41278C496C911A6E", "0x0000000000000000", "0x0000000000000000"],
"XMM9": ["0x41235CCC64AFB361", "0x41235CCC64AFB361", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x412BACE273945DC5", "0x412BACE273945DC5", "0x0000000000000000", "0x0000000000000000"],
"XMM11": ["0x412CF22EF582FD76", "0x412CF22EF582FD76", "0x0000000000000000", "0x0000000000000000"],
"XMM12": ["0x4121C80E40F3BC7B", "0x4121C80E40F3BC7B", "0x0000000000000000", "0x0000000000000000"],
"XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]
vmovaps xmm5, [rdx + 16 * 5]
vmovaps xmm6, [rdx + 16 * 6]
vmovaps xmm7, [rdx + 16 * 7]
vdppd xmm8, xmm0, [rdx + 16 * 8], 11111111b
vdppd xmm9, xmm1, [rdx + 16 * 9], 11111111b
vdppd xmm10, xmm2, [rdx + 16 * 10], 11111111b
vdppd xmm11, xmm3, [rdx + 16 * 11], 11111111b
vdppd xmm12, xmm4, [rdx + 16 * 12], 11111111b
vdppd xmm13, xmm5, [rdx + 16 * 13], 00000000b
vdppd xmm14, xmm6, [rdx + 16 * 14], 11110000b
vdppd xmm15, xmm7, [rdx + 16 * 15], 00001111b
hlt
align 32
.data:
dq 470.4127, 683.87
dq 711.3545, 511.5631
dq 996.8793, 548.682
dq 588.9345, 832.5925
dq 210.6613, 792.6059
dq 298.4494, 154.4895
dq 818.4 , 881.6027
dq 705.3087, 687.478
dq 737.0665, 621.31
dq 755.3097, 189.9614
dq 552.4284, 649.1206
dq 798.252 , 574.5732
dq 593.7565, 577.3129
dq 383.3844, 443.3476
dq 414.3571, 615.1567
dq 94.898 , 438.3107