diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.arm64.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.arm64.cpp index f5fe95aded..e71dd10faf 100644 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.arm64.cpp +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.arm64.cpp @@ -225,17 +225,13 @@ void GSSetupPrimCodeGenerator::Color() // GSVector4 c = dscan.c; armAsm->Ldr(v16, MemOperand(_dscan, offsetof(GSVertexSW, c))); - // constexpr VectorI mask16 = VectorI::cxpr(0xFFFF); - armAsm->Movi(v17.V4S(), 0xFFFF); - - // local.d4.c = (GSVector4i(dscan.c * step_shift) & mask16).xzyw().pu32(); + // GSVector4i tmp = GSVector4i(dscan.c * step_shift).xzyw(); + // local.d4.c = tmp.uzp1_16(tmp); // Not currently in GSVector since that's mainly targeting x86 for now armAsm->Fmul(v2.V4S(), v16.V4S(), v3.V4S()); armAsm->Fcvtzs(v2.V4S(), v2.V4S()); - armAsm->And(v2.V4S(), v17.V4S()); armAsm->Rev64(_vscratch.V4S(), v2.V4S()); armAsm->Uzp1(v2.V4S(), v2.V4S(), _vscratch.V4S()); - armAsm->Uqxtn(v2.V4H(), v2.V4S()); - armAsm->Dup(v2.V2D(), v2.V2D(), 0); + armAsm->Uzp1(v2.V8H(), v2.V8H(), v2.V8H()); armAsm->Str(v2, MemOperand(_locals, offsetof(GSScanlineLocalData, d4.c))); // GSVector4 dr = c.xxxx(); @@ -246,25 +242,18 @@ void GSSetupPrimCodeGenerator::Color() for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { - // VectorI r = (VectorI(dr * shift[1 + i]) & mask16).pu32(); + // VectorI r = VectorI(dr * shift[1 + i]); armAsm->Fmul(v2.V4S(), v0.V4S(), VRegister(4 + i, kFormat4S)); armAsm->Fcvtzs(v2.V4S(), v2.V4S()); - armAsm->And(v2.V4S(), v17.V4S()); - armAsm->Uqxtn(v2.V4H(), v2.V4S()); - armAsm->Dup(v2.V2D(), v2.V2D(), 0); - // VectorI b = (VectorI(db * shift[1 + i]) & mask16).pu32(); + // VectorI b = VectorI(db * shift[1 + i]); armAsm->Fmul(v3.V4S(), v1.V4S(), VRegister(4 + i, kFormat4S)); armAsm->Fcvtzs(v3.V4S(), v3.V4S()); - armAsm->And(v3.V4S(), v17.V4S()); - armAsm->Uqxtn(v3.V4H(), v3.V4S()); - armAsm->Dup(v3.V2D(), v3.V2D(), 0); - // m_local.d[i].rb = r.upl16(b); - - armAsm->Zip1(v2.V8H(), v2.V8H(), v3.V8H()); + // m_local.d[i].rb = r.trn1_16(b); // Not currently in GSVector since that's mainly targeting x86 for now + armAsm->Trn1(v2.V8H(), v2.V8H(), v3.V8H()); armAsm->Str(v2, _local(d[i].rb)); } @@ -278,25 +267,19 @@ void GSSetupPrimCodeGenerator::Color() for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { - // VectorI g = (VectorI(dg * shift[1 + i]) & mask16).pu32(); + // VectorI g = VectorI(dg * shift[1 + i]); armAsm->Fmul(v2.V4S(), v0.V4S(), VRegister(4 + i, kFormat4S)); armAsm->Fcvtzs(v2.V4S(), v2.V4S()); - armAsm->And(v2.V4S(), v17.V4S()); - armAsm->Uqxtn(v2.V4H(), v2.V4S()); - armAsm->Dup(v2.V2D(), v2.V2D(), 0); - // VectorI a = (VectorI(da * shift[1 + i]) & mask16).pu32(); + // VectorI a = VectorI(da * shift[1 + i]); armAsm->Fmul(v3.V4S(), v1.V4S(), VRegister(4 + i, kFormat4S)); armAsm->Fcvtzs(v3.V4S(), v3.V4S()); - armAsm->And(v3.V4S(), v17.V4S()); - armAsm->Uqxtn(v3.V4H(), v3.V4S()); - armAsm->Dup(v3.V2D(), v3.V2D(), 0); - // m_local.d[i].ga = g.upl16(a); + // m_local.d[i].ga = g.trn1_16(a); // Not currently in GSVector since that's mainly targeting x86 for now - armAsm->Zip1(v2.V8H(), v2.V8H(), v3.V8H()); + armAsm->Trn1(v2.V8H(), v2.V8H(), v3.V8H()); armAsm->Str(v2, _local(d[i].ga)); } }