From 2244dd98478c5c04b390559963c897d3c33d7b57 Mon Sep 17 00:00:00 2001 From: lioncash Date: Wed, 14 Dec 2022 06:30:57 +0000 Subject: [PATCH] OpcodeDispatcher: Handle VROUNDSS --- .../Interface/Core/OpcodeDispatcher.cpp | 1 + .../Core/OpcodeDispatcher/Vector.cpp | 3 + .../Interface/Core/X86Tables/VEXTables.cpp | 2 +- unittests/ASM/Disabled_Tests_Simulator | 1 + unittests/ASM/VEX/vroundss.asm | 70 +++++++++++++++++++ 5 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 unittests/ASM/VEX/vroundss.asm diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 039e9eafc..73861d800 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5984,6 +5984,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::VPERM2Op}, {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVXVectorRound<4, false>}, {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVXVectorRound<8, false>}, + {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVXVectorRound<4, true>}, {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::VINSERTOp}, {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index b5d9425e2..da342db39 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -2721,6 +2721,9 @@ void OpDispatchBuilder::AVXVectorRound<4, false>(OpcodeArgs); template void OpDispatchBuilder::AVXVectorRound<8, false>(OpcodeArgs); +template +void OpDispatchBuilder::AVXVectorRound<4, true>(OpcodeArgs); + template void OpDispatchBuilder::VectorBlend(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index 7980a6e69..7d56141b5 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -415,7 +415,7 @@ void InitializeVEXTables() { {OPD(3, 0b01, 0x08), 1, X86InstInfo{"VROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, {OPD(3, 0b01, 0x09), 1, X86InstInfo{"VROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, - {OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1, nullptr}}, {OPD(3, 0b01, 0x0B), 1, X86InstInfo{"VROUNDSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(3, 0b01, 0x0C), 1, X86InstInfo{"VBLENDPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(3, 0b01, 0x0D), 1, X86InstInfo{"VBLENDPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, diff --git a/unittests/ASM/Disabled_Tests_Simulator b/unittests/ASM/Disabled_Tests_Simulator index ca1dca11a..5eeff3c6b 100644 --- a/unittests/ASM/Disabled_Tests_Simulator +++ b/unittests/ASM/Disabled_Tests_Simulator @@ -73,3 +73,4 @@ Test_OpSize/66_5B.asm Test_VEX/vldmxcsr.asm Test_VEX/vroundpd.asm Test_VEX/vroundps.asm +Test_VEX/vroundss.asm diff --git a/unittests/ASM/VEX/vroundss.asm b/unittests/ASM/VEX/vroundss.asm new file mode 100644 index 000000000..e1213cc26 --- /dev/null +++ b/unittests/ASM/VEX/vroundss.asm @@ -0,0 +1,70 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM0": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM1": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM2": ["0xBF0000003F800000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM3": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM4": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM5": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM6": ["0xBF0000003F800000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"], + "XMM7": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"] + } +} +%endif + +lea rdx, [rel .data] + +vmovaps ymm0, [rdx] +vmovaps ymm1, [rdx] +vmovaps ymm2, [rdx] +vmovaps ymm3, [rdx] +vmovaps ymm4, [rdx] +vmovaps ymm5, [rdx] +vmovaps ymm6, [rdx] +vmovaps ymm7, [rdx] + +vroundss xmm0, xmm0, [rdx], 00000000b ; Nearest +vroundss xmm1, xmm1, [rdx], 00000001b ; -inf +vroundss xmm2, xmm2, [rdx], 00000010b ; +inf +vroundss xmm3, xmm3, [rdx], 00000011b ; truncate + +; MXCSR +; Set to nearest +mov eax, 0x1F80 +mov [rel .mxcsr], eax +ldmxcsr [rel .mxcsr] + +vroundss xmm4, xmm4, [rdx], 00000100b + +; Set to -inf +mov eax, 0x3F80 +mov [rel .mxcsr], eax +ldmxcsr [rel .mxcsr] + +vroundss xmm5, xmm5, [rdx], 00000100b + +; Set to +inf +mov eax, 0x5F80 +mov [rel .mxcsr], eax +ldmxcsr [rel .mxcsr] + +vroundss xmm6, xmm6, [rdx], 00000100b + +; Set to truncate +mov eax, 0x7F80 +mov [rel .mxcsr], eax +ldmxcsr [rel .mxcsr] + +vroundss xmm7, xmm7, [rdx], 00000100b + +hlt + +align 32 +.data: +dd 0.5, -0.5, 1.5, -1.5 +dd 0.5, -0.5, 1.5, -1.5 + +.mxcsr: +dq 0, 0