diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 4b709a60b..de6c9c669 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5698,7 +5698,10 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x12), 1, &OpDispatchBuilder::VMOVLPOp}, {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, - + + {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::VMOVHPOp}, + {OPD(1, 0b00, 0x17), 1, &OpDispatchBuilder::VMOVHPOp}, + {OPD(1, 0b00, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPD_Op}, {OPD(1, 0b01, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPD_Op}, {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPD_Op}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 56bea8cef..1e6a72aa0 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -408,6 +408,7 @@ public: void VMOVAPS_VMOVAPD_Op(OpcodeArgs); void VMOVUPS_VMOVUPD_Op(OpcodeArgs); + void VMOVHPOp(OpcodeArgs); void VMOVLPOp(OpcodeArgs); // X87 Ops diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 8a0a8b877..873fbd15f 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -92,6 +92,19 @@ void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) { } } +void OpDispatchBuilder::VMOVHPOp(OpcodeArgs) { + if (Op->Dest.IsGPR()) { + OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 16); + OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, 8); + OrderedNode *Result = _VInsElement(16, 8, 1, 0, Src1, Src2); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1); + } else { + OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 16); + OrderedNode *Result = _VInsElement(16, 8, 0, 1, Src, Src); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 8, 8); + } +} + void OpDispatchBuilder::MOVLPOp(OpcodeArgs) { OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 8); if (Op->Dest.IsGPR()) { diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index a88ca1ad1..a0ecdb729 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -41,11 +41,11 @@ void InitializeVEXTables() { {OPD(1, 0b00, 0x15), 1, X86InstInfo{"VUNPCKHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(1, 0b01, 0x15), 1, X86InstInfo{"VUNPCKHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, - {OPD(1, 0b00, 0x16), 1, X86InstInfo{"VMOVHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b00, 0x16), 1, X86InstInfo{"VMOVHPS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}}, {OPD(1, 0b01, 0x16), 1, X86InstInfo{"VMOVHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(1, 0b10, 0x16), 1, X86InstInfo{"VMOVSHDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, - {OPD(1, 0b00, 0x17), 1, X86InstInfo{"VMOVHPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(1, 0b00, 0x17), 1, X86InstInfo{"VMOVHPS", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(1, 0b01, 0x17), 1, X86InstInfo{"VMOVHPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(1, 0b00, 0x50), 1, X86InstInfo{"VMOVMSKPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, diff --git a/unittests/ASM/VEX/vmovhps.asm b/unittests/ASM/VEX/vmovhps.asm new file mode 100644 index 000000000..d1d1d8300 --- /dev/null +++ b/unittests/ASM/VEX/vmovhps.asm @@ -0,0 +1,36 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM1": ["0xCCCCCCCCCCCCCCCC", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"], + "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"], + "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"], + "XMM4": ["0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"] + } +} +%endif + +lea rdx, [rel .data] + +;; Register as DST tests +; Load inputs +vmovapd ymm1, [rdx] +vmovapd ymm2, [rdx + 32] + +vmovhps xmm1, xmm2, [rdx + 48] +vmovhps xmm3, xmm1, [rdx + 56] + +;; Store to memory test +; Overwrite beginning of data, then yank it back into a vector +; Nothing in memory should be modified except the first 64 bits. +vmovhps [rdx], xmm2 +vmovapd ymm4, [rdx] + +hlt + +align 32 +.data: +db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD +db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF