diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 7cb0d389c..cd33580df 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5934,6 +5934,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::AVXVectorALUOp}, {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::VPSRAOp<2>}, + {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::VPSRAOp<4>}, {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::AVXVectorALUOp}, {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::VMOVVectorNTOp}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 6c1d8f1a7..9d1faffc8 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1478,6 +1478,8 @@ void OpDispatchBuilder::VPSRAOp(OpcodeArgs) { template void OpDispatchBuilder::VPSRAOp<2>(OpcodeArgs); +template +void OpDispatchBuilder::VPSRAOp<4>(OpcodeArgs); void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index 35cb8f245..cf7c19321 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -221,7 +221,7 @@ void InitializeVEXTables() { {OPD(1, 0b01, 0xE0), 1, X86InstInfo{"VPAVGB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(1, 0b01, 0xE1), 1, X86InstInfo{"VPSRAW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, - {OPD(1, 0b01, 0xE2), 1, X86InstInfo{"VPSRAD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b01, 0xE2), 1, X86InstInfo{"VPSRAD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(1, 0b01, 0xE3), 1, X86InstInfo{"VPAVGW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(1, 0b01, 0xE4), 1, X86InstInfo{"VPMULHUW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(1, 0b01, 0xE5), 1, X86InstInfo{"VPMULHW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, diff --git a/unittests/ASM/VEX/vpsrad.asm b/unittests/ASM/VEX/vpsrad.asm new file mode 100644 index 000000000..770b26d08 --- /dev/null +++ b/unittests/ASM/VEX/vpsrad.asm @@ -0,0 +1,69 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM4": ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"], + "XMM5": ["0xFFFF804200004546", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"], + "XMM6": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM7": ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"], + "XMM8": ["0xFFFF804200004546", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"], + "XMM9": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM10": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"], + "XMM11": ["0xFFFF804200004546", "0x0000717200007576", "0x0000414200004546", "0x0000717200007576"], + "XMM12": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM13": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"], + "XMM14": ["0xFFFF804200004546", "0x0000717200007576", "0x0000414200004546", "0x0000717200007576"], + "XMM15": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"] + }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +lea rdx, [rel .data] + +vmovapd ymm0, [rdx + 32 * 0] +vmovapd ymm1, [rdx + 32 * 1] +vmovapd ymm2, [rdx + 32 * 2] +vmovapd ymm3, [rdx + 32 * 3] + +vpsrad xmm4, xmm0, xmm1 +vpsrad xmm5, xmm0, xmm2 +vpsrad xmm6, xmm0, xmm3 + +vpsrad xmm7, xmm0, [rdx + 32 * 1] +vpsrad xmm8, xmm0, [rdx + 32 * 2] +vpsrad xmm9, xmm0, [rdx + 32 * 3] + +vpsrad ymm10, ymm0, xmm1 +vpsrad ymm11, ymm0, xmm2 +vpsrad ymm12, ymm0, xmm3 + +vpsrad ymm13, ymm0, [rdx + 32 * 1] +vpsrad ymm14, ymm0, [rdx + 32 * 2] +vpsrad ymm15, ymm0, [rdx + 32 * 3] + +hlt + +align 32 +.data: +dq 0x8042434445464748 +dq 0x7172737475767778 +dq 0x4142434445464748 +dq 0x7172737475767778 + +dq 0x0000000000000000 +dq 0x5152535455565758 +dq 0x5152535455565758 +dq 0x5152535455565758 + +dq 0x0000000000000010 +dq 0x5152535455565758 +dq 0x5152535455565758 +dq 0x5152535455565758 + +dq 0x0000000000000020 +dq 0x5152535455565758 +dq 0x5152535455565758 +dq 0x5152535455565758