diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index f466a5b02..6b2b9e0bc 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5882,7 +5882,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::AVXVectorALUOp}, {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::AVXVectorALUOp}, - {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::VBROADCASTOp}, + {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::VBROADCASTOp<4>}, + {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::VBROADCASTOp<8>}, {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::VMOVVectorNTOp}, {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::UnimplementedOp}, diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 7a6fb2208..971440336 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -409,6 +409,7 @@ public: void VANDNOp(OpcodeArgs); + template void VBROADCASTOp(OpcodeArgs); void VMOVAPS_VMOVAPD_Op(OpcodeArgs); diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index f89c829f3..8ac09454e 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -868,6 +868,7 @@ void OpDispatchBuilder::VANDNOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } +template void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -876,10 +877,10 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { if (Op->Src[0].IsGPR()) { OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1); - Result = _VDupElement(DstSize, 4, Src, 0); + Result = _VDupElement(DstSize, ElementSize, Src, 0); } else { - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 4, Op->Flags, -1); - Result = _VDupElement(DstSize, 4, Src, 0); + OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ElementSize, Op->Flags, -1); + Result = _VDupElement(DstSize, ElementSize, Src, 0); } if (Is128Bit) { @@ -889,6 +890,11 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } +template +void OpDispatchBuilder::VBROADCASTOp<4>(OpcodeArgs); +template +void OpDispatchBuilder::VBROADCASTOp<8>(OpcodeArgs); + template void OpDispatchBuilder::PINSROp(OpcodeArgs) { auto Size = GetDstSize(Op); diff --git a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp index 9fa797f36..9c20ba951 100644 --- a/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp +++ b/External/FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp @@ -283,7 +283,7 @@ void InitializeVEXTables() { {OPD(2, 0b01, 0x17), 1, X86InstInfo{"VPTEST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(2, 0b01, 0x18), 1, X86InstInfo{"VBROADCASTSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, - {OPD(2, 0b01, 0x19), 1, X86InstInfo{"VBROADCASTSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, + {OPD(2, 0b01, 0x19), 1, X86InstInfo{"VBROADCASTSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}}, {OPD(2, 0b01, 0x1A), 1, X86InstInfo{"VBROADCASTF128", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(2, 0b01, 0x1C), 1, X86InstInfo{"VPABSB", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, {OPD(2, 0b01, 0x1D), 1, X86InstInfo{"VPABSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}}, diff --git a/unittests/ASM/VEX/vbroadcastsd.asm b/unittests/ASM/VEX/vbroadcastsd.asm new file mode 100644 index 000000000..f9c9b9723 --- /dev/null +++ b/unittests/ASM/VEX/vbroadcastsd.asm @@ -0,0 +1,40 @@ +%ifdef CONFIG +{ + "HostFeatures": ["AVX"], + "RegData": { + "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"], + "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"], + "XMM2": ["0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0"], + "XMM3": ["0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0"], + "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"], + "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"] + } +} +%endif + +lea rdx, [rel .data] + +vmovaps ymm0, [rdx + 32 * 0] +vmovaps ymm1, [rdx + 32 * 1] + +; Register broadcasting +vbroadcastsd ymm2, xmm0 +vbroadcastsd ymm3, xmm1 + +; Memory broadcasting +vbroadcastsd ymm4, [rdx + 16] +vbroadcastsd ymm5, [rdx + 24] + +hlt + +align 32 +.data: +dq 0xA76C4F06A12BFCE0 +dq 0x9B80767F1E6A060F +dq 0xFFFFFFFFFFFFFFFF +dq 0xEEEEEEEEEEEEEEEE + +dq 0x6868C3F3AAED56E0 +dq 0xF0FCE9E294E6E6DE +dq 0xDDDDDDDDDDDDDDDD +dq 0xCCCCCCCCCCCCCCCC