From bcb6726b22d3bcf62fea4df67167520db0af4e92 Mon Sep 17 00:00:00 2001 From: lioncash Date: Tue, 20 Dec 2022 16:03:17 +0000 Subject: [PATCH] OpcodeDispatcher: Handle VAESKEYGENASSIST This does the exact same thing as AESKEYGENASSIST, except that the upper lane gets cleared. --- .../Interface/Core/OpcodeDispatcher.cpp | 2 + .../Source/Interface/Core/OpcodeDispatcher.h | 2 + .../Core/OpcodeDispatcher/Crypto.cpp | 6 ++ .../Interface/Core/X86Tables/VEXTables.cpp | 2 +- unittests/ASM/Disabled_Tests_Simulator | 1 + unittests/ASM/VEX/vaeskeygenassist.asm | 64 +++++++++++++++++++ 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 unittests/ASM/VEX/vaeskeygenassist.asm diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 5d03c596c..4c1cb0c9e 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -6032,6 +6032,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp}, {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op}, + + {OPD(3, 0b01, 0xDF), 1, &OpDispatchBuilder::VAESKeyGenAssistOp}, }; #undef OPD diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 3069e9bfc..a364b7adb 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -419,6 +419,8 @@ public: template void VADDSUBPOp(OpcodeArgs); + void VAESKeyGenAssistOp(OpcodeArgs); + void VANDNOp(OpcodeArgs); template diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp index 2a9abff8f..4e71c4308 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp @@ -311,6 +311,12 @@ void OpDispatchBuilder::AESKeyGenAssist(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } +void OpDispatchBuilder::VAESKeyGenAssistOp(OpcodeArgs) { + OrderedNode *Assist = AESKeyGenAssistImpl(Op); + OrderedNode *Result = _VMov(16, Assist); + StoreResult(FPRClass, Op, Result, -1); +} + void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Selector needs to be literal here"); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index 80f2aded2..a79b4da5d 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -478,7 +478,7 @@ void InitializeVEXTables() { {OPD(3, 0b01, 0x7E), 1, X86InstInfo{"VFNMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(3, 0b01, 0x7F), 1, X86InstInfo{"VFNMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, - {OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}}, {OPD(3, 0b11, 0xF0), 1, X86InstInfo{"RORX", TYPE_INST, FLAGS_MODRM, 1, nullptr}}, diff --git a/unittests/ASM/Disabled_Tests_Simulator b/unittests/ASM/Disabled_Tests_Simulator index cd809ca7d..01819e3b9 100644 --- a/unittests/ASM/Disabled_Tests_Simulator +++ b/unittests/ASM/Disabled_Tests_Simulator @@ -5,6 +5,7 @@ Test_H0F38/66_DD.asm Test_H0F38/66_DE.asm Test_H0F38/66_DF.asm Test_H0F3A/0_66_DF.asm +Test_VEX/vaeskeygenassist.asm # PCMUL considered to be part of crypto operations. Simulator doesn't support this. Test_H0F3A/pclmulqdq.asm diff --git a/unittests/ASM/VEX/vaeskeygenassist.asm b/unittests/ASM/VEX/vaeskeygenassist.asm new file mode 100644 index 000000000..58bd76f53 --- /dev/null +++ b/unittests/ASM/VEX/vaeskeygenassist.asm @@ -0,0 +1,64 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM0": ["0x6363636363636363", "0x6363636363636363", "0x0000000000000000", "0x0000000000000000"], + "XMM1": ["0x1616161616161616", "0x1616161616161616", "0x0000000000000000", "0x0000000000000000"], + "XMM2": ["0x7C6363636363637C", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM3": ["0x1616161616161616", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM4": ["0x6363636263636363", "0x6363636263636363", "0x0000000000000000", "0x0000000000000000"], + "XMM5": ["0x1616161416161616", "0x1616161416161616", "0x0000000000000000", "0x0000000000000000"], + "XMM6": ["0x7C6363606363637C", "0x7C6363606363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM7": ["0x1616161216161616", "0x7C6363676363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM8": ["0x6363636663636363", "0x6363636663636363", "0x0000000000000000", "0x0000000000000000"], + "XMM9": ["0x1616161016161616", "0x1616161016161616", "0x0000000000000000", "0x0000000000000000"], + "XMM10": ["0x7C6363646363637C", "0x7C6363646363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM11": ["0x1616161E16161616", "0x7C63636B6363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM12": ["0x6363636A63636363", "0x6363636A63636363", "0x0000000000000000", "0x0000000000000000"], + "XMM13": ["0x1616161C16161616", "0x1616161C16161616", "0x0000000000000000", "0x0000000000000000"], + "XMM14": ["0x7C6363686363637C", "0x7C6363686363637C", "0x0000000000000000", "0x0000000000000000"], + "XMM15": ["0x1616161A16161616", "0x7C63636F6363637C", "0x0000000000000000", "0x0000000000000000"] + } +} +%endif + +lea rdx, [rel .data] + +vaeskeygenassist xmm0, [rdx + 16 * 0], 0 +vaeskeygenassist xmm1, [rdx + 16 * 1], 0 +vaeskeygenassist xmm2, [rdx + 16 * 2], 0 +vaeskeygenassist xmm3, [rdx + 16 * 3], 0 + +vaeskeygenassist xmm4, [rdx + 16 * 0], 1 +vaeskeygenassist xmm5, [rdx + 16 * 1], 2 +vaeskeygenassist xmm6, [rdx + 16 * 2], 3 +vaeskeygenassist xmm7, [rdx + 16 * 3], 4 + +vaeskeygenassist xmm8, [rdx + 16 * 0], 5 +vaeskeygenassist xmm9, [rdx + 16 * 1], 6 +vaeskeygenassist xmm10, [rdx + 16 * 2], 7 +vaeskeygenassist xmm11, [rdx + 16 * 3], 8 + +vaeskeygenassist xmm12, [rdx + 16 * 0], 9 +vaeskeygenassist xmm13, [rdx + 16 * 1], 10 +vaeskeygenassist xmm14, [rdx + 16 * 2], 11 +vaeskeygenassist xmm15, [rdx + 16 * 3], 12 + +hlt + +align 16 +.data: +dq 0x0000000000000000 +dq 0x0000000000000000 + +dq 0xFFFFFFFFFFFFFFFF +dq 0xFFFFFFFFFFFFFFFF + +dq 0x0000000100000001 +dq 0x0000000100000001 + +dq 0xFFFFFFFF00000000 +dq 0x00000001FFFFFFFF + +dq 0x0202020202020202 +dq 0x0303030303030303