From d5d77b41f9fd4755075c221865ad310e75850812 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sat, 14 May 2016 14:23:18 -0700
Subject: [PATCH] jit-ir: Properly flip vtfm simd cases.

Disable the broken one and add clearer comments.
---
 Core/MIPS/IR/IRCompVFPU.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp
index a469a9cf8..40cd16067 100644
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@@ -1203,12 +1203,13 @@ namespace MIPSComp {
 		GetVectorRegs(tregs, sz, _VT);
 		GetVectorRegs(dregs, sz, _VD);
 
-		// SIMD-optimized implementations - if sregs[0..3] is consecutive, the rest are too.
-		if (msz == M_4x4 && IsConsecutive4(sregs)) {
+		// SIMD-optimized implementations - if sregs[0..3] is non-consecutive, it's transposed.
+		if (msz == M_4x4 && !IsConsecutive4(sregs)) {
 			int s0 = IRVTEMP_0;
-			int s1 = IRVTEMP_PFX_T;
+			int s1 = IRVTEMP_PFX_S;
 			// For this algorithm, we don't care if tregs are consecutive or not,
 			// they are accessed one at a time. This handles homogenous transforms correctly, as well.
+			// We take advantage of sregs[0] + 1 being sregs[4] here.
 			ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]);
 			for (int i = 1; i < 4; i++) {
 				if (!homogenous || (i != n - 1)) {
@@ -1226,10 +1227,12 @@ namespace MIPSComp {
 				}
 			}
 			return;
-		} else if (msz == M_4x4 && !IsConsecutive4(sregs)) {
+		} else if (msz == M_4x4 && IsConsecutive4(sregs)) {
+			// Consecutive, which is harder.
+			DISABLE;
 			int s0 = IRVTEMP_0;
 			int s1 = IRVTEMP_PFX_S;
-			// Doesn't make complete sense to me why this works....
+			// Doesn't make complete sense to me why this works.... (because it doesn't.)
 			ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]);
 			for (int i = 1; i < 4; i++) {
 				if (!homogenous || (i != n - 1)) {