From 8291e600fa2887bfec9767e49439f1e7c2853998 Mon Sep 17 00:00:00 2001 From: lioncash Date: Fri, 16 Dec 2022 23:44:49 +0000 Subject: [PATCH] OpcodeDispatcher: Simplify ADDSUBPOp Rather than looping vectors, we can interleave them together directly with IR ops. --- .../Core/OpcodeDispatcher/Vector.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 102647183..b2b3c747a 100644 --- a/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -2165,24 +2165,22 @@ void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); template void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = GetSrcSize(Op); OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1); OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1); - OrderedNode *ResAdd{}; - OrderedNode *ResSub{}; - ResAdd = _VFAdd(Size, ElementSize, Dest, Src); - ResSub = _VFSub(Size, ElementSize, Dest, Src); + OrderedNode *ResAdd = _VFAdd(Size, ElementSize, Dest, Src); + OrderedNode *ResSub = _VFSub(Size, ElementSize, Dest, Src); - // We now need to swizzle results - uint8_t NumElements = Size / ElementSize; // Even elements are the sub result // Odd elements are the add results - for (size_t i = 0; i < NumElements; i += 2) { - ResAdd = _VInsElement(Size, ElementSize, i, i, ResAdd, ResSub); - } - StoreResult(FPRClass, Op, ResAdd, -1); + OrderedNode *UnzipSub = _VUnZip(Size, ElementSize, ResSub, ResSub); + OrderedNode *UnzipAdd = _VUnZip2(Size, ElementSize, ResAdd, ResAdd); + + OrderedNode *Result = _VZip(Size, ElementSize, UnzipSub, UnzipAdd); + + StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::PFNACCOp(OpcodeArgs) {