From b00b41b8faa7f6a645209b835a1f2f4e43c6ba0a Mon Sep 17 00:00:00 2001 From: lioncash Date: Wed, 28 Dec 2022 07:57:30 +0000 Subject: [PATCH] OpcodeDispatcher: Handle VCVTPD2DQ --- .../Interface/Core/OpcodeDispatcher.cpp | 1 + .../Core/OpcodeDispatcher/Vector.cpp | 8 +- .../Interface/Core/X86Tables/VEXTables.cpp | 2 +- unittests/ASM/Disabled_Tests_Simulator | 1 + unittests/ASM/VEX/vcvtpd2dq.asm | 142 ++++++++++++++++++ unittests/ASM/VEX/vcvtpd2dq_inexact.asm | 45 ++++++ 6 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 unittests/ASM/VEX/vcvtpd2dq.asm create mode 100644 unittests/ASM/VEX/vcvtpd2dq_inexact.asm diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 290ee2182..17aad254f 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5953,6 +5953,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::VPMULHWOp}, {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, true>}, + {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, true>}, {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::VMOVVectorNTOp}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 6d55e8fcc..f3dea8aff 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -1798,9 +1798,12 @@ void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; + // VCVTPD2DQ/VCVTTPD2DQ only use the bottom lane, even for the 256-bit version. + const auto Truncate = SrcElementSize == 8 && Narrow; + OrderedNode *Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); - if (Is128Bit) { + if (Is128Bit || Truncate) { Result = _VMov(16, Result); } StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); @@ -1811,6 +1814,9 @@ void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); +template +void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); + template void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs) { const auto DstSize = GetDstSize(Op); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index e07edd2dc..410350444 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -228,7 +228,7 @@ void InitializeVEXTables() { {OPD(1, 0b01, 0xE6), 1, X86InstInfo{"VCVTTPD2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(1, 0b10, 0xE6), 1, X86InstInfo{"VCVTDQ2PD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, - {OPD(1, 0b11, 0xE6), 1, X86InstInfo{"VCVTPD2DQ", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b11, 0xE6), 1, X86InstInfo{"VCVTPD2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(1, 0b01, 0xE7), 1, X86InstInfo{"VMOVNTDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, diff --git a/unittests/ASM/Disabled_Tests_Simulator b/unittests/ASM/Disabled_Tests_Simulator index cd4d35dd4..e802ebc5c 100644 --- a/unittests/ASM/Disabled_Tests_Simulator +++ b/unittests/ASM/Disabled_Tests_Simulator @@ -76,6 +76,7 @@ Test_H0F3A/66_09.asm Test_H0F3A/66_0A.asm Test_H0F3A/66_0B.asm Test_OpSize/66_5B.asm +Test_VEX/vcvtpd2dq_inexact.asm Test_VEX/vcvtps2dq_inexact.asm Test_VEX/vldmxcsr.asm Test_VEX/vroundpd.asm diff --git a/unittests/ASM/VEX/vcvtpd2dq.asm b/unittests/ASM/VEX/vcvtpd2dq.asm new file mode 100644 index 000000000..241afe856 --- /dev/null +++ b/unittests/ASM/VEX/vcvtpd2dq.asm @@ -0,0 +1,142 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM0": ["0x0000004600000053", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM1": ["0x0000000D00000029", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM2": ["0x0000001600000005", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM3": ["0x000000050000000A", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM4": ["0x000000430000001D", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM5": ["0x0000005B00000013", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM6": ["0x0000003300000028", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM7": ["0x0000001800000021", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM8": ["0x000000180000005B", "0x000000180000005B", "0x0000000000000000", "0x0000000000000000"], + "XMM9": ["0x0000005B00000063", "0x0000005B00000063", "0x0000000000000000", "0x0000000000000000"], + "XMM10": ["0x000000630000005B", "0x000000630000005B", "0x0000000000000000", "0x0000000000000000"], + "XMM11": ["0x0000004A00000041", "0x0000004A00000041", "0x0000000000000000", "0x0000000000000000"], + "XMM12": ["0x0000001900000023", "0x0000001900000023", "0x0000000000000000", "0x0000000000000000"], + "XMM13": ["0x0000005A00000006", "0x0000005A00000006", "0x0000000000000000", "0x0000000000000000"], + "XMM14": ["0x0000003400000021", "0x0000003400000021", "0x0000000000000000", "0x0000000000000000"], + "XMM15": ["0x0000000A0000003A", "0x0000000A0000003A", "0x0000000000000000", "0x0000000000000000"] + } +} +%endif + +lea rdx, [rel .data] + +vcvtpd2dq xmm0, oword [rdx + 32 * 0] +vcvtpd2dq xmm1, oword [rdx + 32 * 1] +vcvtpd2dq xmm2, oword [rdx + 32 * 2] +vcvtpd2dq xmm3, oword [rdx + 32 * 3] +vcvtpd2dq xmm4, oword [rdx + 32 * 4] +vcvtpd2dq xmm5, oword [rdx + 32 * 5] +vcvtpd2dq xmm6, oword [rdx + 32 * 6] +vcvtpd2dq xmm7, oword [rdx + 32 * 7] +vcvtpd2dq xmm8, yword [rdx + 32 * 8] +vcvtpd2dq xmm9, yword [rdx + 32 * 9] +vcvtpd2dq xmm10, yword [rdx + 32 * 10] +vcvtpd2dq xmm11, yword [rdx + 32 * 11] +vcvtpd2dq xmm12, yword [rdx + 32 * 12] +vcvtpd2dq xmm13, yword [rdx + 32 * 13] +vcvtpd2dq xmm14, yword [rdx + 32 * 14] +vcvtpd2dq xmm15, yword [rdx + 32 * 15] + +hlt + +align 32 +.data: +dq 83.0999 , 69.50512 +dq 83.0999 , 69.50512 + +dq 41.02678, 13.05881 +dq 41.02678, 13.05881 + +dq 5.35242 , 21.9932 +dq 5.35242 , 21.9932 + +dq 9.67383 , 5.32372 +dq 9.67383 , 5.32372 + +dq 29.02872, 66.50151 +dq 29.02872, 66.50151 + +dq 19.30764, 91.3633 +dq 19.30764, 91.3633 + +dq 40.45086, 50.96153 +dq 40.45086, 50.96153 + +dq 32.64489, 23.97574 +dq 32.64489, 23.97574 + +dq 90.64316, 24.22547 +dq 90.64316, 24.22547 + +dq 98.9394 , 91.21715 +dq 98.9394 , 91.21715 + +dq 90.80143, 99.48407 +dq 90.80143, 99.48407 + +dq 64.97245, 74.39838 +dq 64.97245, 74.39838 + +dq 35.22761, 25.35321 +dq 35.22761, 25.35321 + +dq 5.8732 , 90.19956 +dq 5.8732 , 90.19956 + +dq 33.03133, 52.02952 +dq 33.03133, 52.02952 + +dq 58.38554, 10.17531 +dq 58.38554, 10.17531 + +dq 47.84703, 84.04831 +dq 47.84703, 84.04831 + +dq 90.02965, 65.81329 +dq 90.02965, 65.81329 + +dq 96.27991, 6.64479 +dq 96.27991, 6.64479 + +dq 25.58971, 95.00694 +dq 25.58971, 95.00694 + +dq 88.1929 , 37.16964 +dq 88.1929 , 37.16964 + +dq 49.52602, 10.27223 +dq 49.52602, 10.27223 + +dq 77.70605, 20.21439 +dq 77.70605, 20.21439 + +dq 9.8056 , 41.29389 +dq 9.8056 , 41.29389 + +dq 15.4071 , 57.54286 +dq 15.4071 , 57.54286 + +dq 9.61117 , 55.54302 +dq 9.61117 , 55.54302 + +dq 52.90745, 4.88086 +dq 52.90745, 4.88086 + +dq 72.52882, 3.0201 +dq 72.52882, 3.0201 + +dq 56.55091, 71.22749 +dq 56.55091, 71.22749 + +dq 61.84736, 88.74295 +dq 61.84736, 88.74295 + +dq 47.72641, 24.17404 +dq 47.72641, 24.17404 + +dq 33.70564, 96.71303 +dq 33.70564, 96.71303 diff --git a/unittests/ASM/VEX/vcvtpd2dq_inexact.asm b/unittests/ASM/VEX/vcvtpd2dq_inexact.asm new file mode 100644 index 000000000..e5d4574a8 --- /dev/null +++ b/unittests/ASM/VEX/vcvtpd2dq_inexact.asm @@ -0,0 +1,45 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM0": ["0x0000000200000001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM1": ["0xFFFFFFFEFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"], + "XMM3": ["0x0000000200000001", "0x0000000200000001", "0x0000000000000000", "0x0000000000000000"], + "XMM4": ["0xFFFFFFFEFFFFFFFF", "0xFFFFFFFEFFFFFFFF", "0x0000000000000000", "0x0000000000000000"] + }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +lea rdx, [rel .data] + +vmovapd ymm0, [rdx + 32 * 2] +vmovapd ymm1, [rdx + 32 * 2] +vmovapd ymm2, [rdx] + +vcvtpd2dq xmm0, xmm2 +vcvtpd2dq xmm1, oword [rdx + 32 * 1] + +vcvtpd2dq xmm3, ymm2 +vcvtpd2dq xmm4, yword [rdx + 32 * 1] + +hlt + +align 32 +.data: +dq 0x3FF0000000000000 +dq 0x4000000000000000 +dq 0x3FF0000000000000 +dq 0x4000000000000000 + +dq 0xBFF0000000000000 +dq 0xC000000000000000 +dq 0xBFF0000000000000 +dq 0xC000000000000000 + +dq 0x4142434445464748 +dq 0x5152535455565758 +dq 0x4142434445464748 +dq 0x5152535455565758