From d5d77b41f9fd4755075c221865ad310e75850812 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 14 May 2016 14:23:18 -0700 Subject: [PATCH] jit-ir: Properly flip vtfm simd cases. Disable the broken one and add clearer comments. --- Core/MIPS/IR/IRCompVFPU.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index a469a9cf8..40cd16067 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1203,12 +1203,13 @@ namespace MIPSComp { GetVectorRegs(tregs, sz, _VT); GetVectorRegs(dregs, sz, _VD); - // SIMD-optimized implementations - if sregs[0..3] is consecutive, the rest are too. - if (msz == M_4x4 && IsConsecutive4(sregs)) { + // SIMD-optimized implementations - if sregs[0..3] is non-consecutive, it's transposed. + if (msz == M_4x4 && !IsConsecutive4(sregs)) { int s0 = IRVTEMP_0; - int s1 = IRVTEMP_PFX_T; + int s1 = IRVTEMP_PFX_S; // For this algorithm, we don't care if tregs are consecutive or not, // they are accessed one at a time. This handles homogenous transforms correctly, as well. + // We take advantage of sregs[0] + 1 being sregs[4] here. ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); for (int i = 1; i < 4; i++) { if (!homogenous || (i != n - 1)) { @@ -1226,10 +1227,12 @@ namespace MIPSComp { } } return; - } else if (msz == M_4x4 && !IsConsecutive4(sregs)) { + } else if (msz == M_4x4 && IsConsecutive4(sregs)) { + // Consecutive, which is harder. + DISABLE; int s0 = IRVTEMP_0; int s1 = IRVTEMP_PFX_S; - // Doesn't make complete sense to me why this works.... + // Doesn't make complete sense to me why this works.... (because it doesn't.) ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]); for (int i = 1; i < 4; i++) { if (!homogenous || (i != n - 1)) {