diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 116569172..2cde014a2 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5868,6 +5868,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::VPERMILRegOp<4>}, {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::VPERMILRegOp<8>}, {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::VTESTPOp<4>}, + {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::VTESTPOp<8>}, {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::VPERMDOp}, {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::PTestOp}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 5af7eb99b..df16a13d7 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3823,6 +3823,8 @@ void OpDispatchBuilder::VTESTPOp(OpcodeArgs) { } template void OpDispatchBuilder::VTESTPOp<4>(OpcodeArgs); +template +void OpDispatchBuilder::VTESTPOp<8>(OpcodeArgs); OrderedNode* OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) { const auto Size = GetSrcSize(Op); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index af0239fda..169da8c3d 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -275,7 +275,7 @@ void InitializeVEXTables() { {OPD(2, 0b01, 0x0C), 1, X86InstInfo{"VPERMILPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(2, 0b01, 0x0D), 1, X86InstInfo{"VPERMILPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(2, 0b01, 0x0E), 1, X86InstInfo{"VTESTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, - {OPD(2, 0b01, 0x0F), 1, X86InstInfo{"VTESTPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x0F), 1, X86InstInfo{"VTESTPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(2, 0b01, 0x13), 1, X86InstInfo{"VCVTPH2PS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(2, 0b01, 0x16), 1, X86InstInfo{"VPERMPS", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}}, diff --git a/unittests/ASM/VEX/vtestpd.asm b/unittests/ASM/VEX/vtestpd.asm new file mode 100644 index 000000000..98f9abe9f --- /dev/null +++ b/unittests/ASM/VEX/vtestpd.asm @@ -0,0 +1,96 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "R15": "0x0000000EDDFFB77F", + "XMM0": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"], + "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"] + } +} +%endif + +; Uses AX and BX and stores result in r15 +; CF:ZF +%macro zfcfmerge 0 + lahf + + ; Shift CF to zero + shr ax, 8 + + ; Move to a temp + mov bx, ax + and rbx, 1 + + shl r15, 1 + or r15, rbx + + shl r15, 1 + + ; Move to a temp + mov bx, ax + + ; Extract ZF + shr bx, 6 + and rbx, 1 + + ; Insert ZF + or r15, rbx +%endmacro + +%macro tests 1 + vtestpd %{1}0, [rdx + 32 * 3] + zfcfmerge + vtestpd %{1}1, [rdx + 32 * 4] + zfcfmerge + vtestpd %{1}2, [rdx + 32 * 5] + zfcfmerge + vtestpd %{1}0, [rdx + 32 * 6] + zfcfmerge + vtestpd %{1}1, [rdx + 32 * 7] + zfcfmerge + vtestpd %{1}2, [rdx + 32 * 8] + zfcfmerge + vtestpd %{1}0, [rdx + 32 * 9] + zfcfmerge + vtestpd %{1}1, [rdx + 32 * 10] + zfcfmerge + vtestpd %{1}2, [rdx + 32 * 11] + zfcfmerge +%endmacro + +lea rdx, [rel .data] + +mov rax, 0 +mov rbx, 0 +mov r15, 0 + +vmovaps ymm0, [rdx + 32 * 0] +vmovaps ymm1, [rdx + 32 * 1] +vmovaps ymm2, [rdx + 32 * 2] + +tests xmm +tests ymm + +hlt + +align 32 +.data: +dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 +dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758 + +; Match +dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 +dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758 + +; Match on not +dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 +dq 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7, 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7 + +; No match on either case +dq 1, 1, 1, 1 +dq 2, 2, 2, 2 +dq 3, 3, 3, 3