interp: Handle prefixes for Vmmov/Vmmul/Vmscl.

I doubt any actual code uses this, but we have some tricky VFPU bugs left, so just trying for maximum accuracy in the interpreter.
2025-02-19 21:52:45 +00:00 · 2019-03-10 16:39:08 -07:00 · 2019-03-10 16:39:08 -07:00 · d40ac043d4
commit d40ac043d4
parent 26b1368f7b
2 changed files with 56 additions and 30 deletions
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@ -1043,8 +1043,11 @@ namespace MIPSComp {

 	void IRFrontend::Comp_Vmmov(MIPSOpcode op) {
 		CONDITIONAL_DISABLE(VFPU_MTX);
+		if (!js.HasNoPrefix()) {
+			DISABLE;
+		}

-		// Matrix move (no prefixes)
+		// Matrix move (weird prefixes)
 		// D[N,M] = S[N,M]

 		int vs = _VS;
@ -1100,9 +1103,13 @@ namespace MIPSComp {

 	void IRFrontend::Comp_Vmscl(MIPSOpcode op) {
 		CONDITIONAL_DISABLE(VFPU_MTX);
+		if (!js.HasNoPrefix()) {
+			DISABLE;
+		}

-		// Matrix scale, matrix by scalar (no prefixes)
+		// Matrix scale, matrix by scalar (weird prefixes)
 		// d[N,M] = s[N,M] * t[0]
+		// Note: behaves just slightly differently than a series of vscls.

 		int vs = _VS;
 		int vd = _VD;
@ -1216,7 +1223,7 @@ namespace MIPSComp {
 			DISABLE;
 		}

-		// Matrix multiply (wierd prefixes)
+		// Matrix multiply (weird prefixes)
 		// D[0 .. N, 0 .. M] = S[0 .. N, 0 .. M]' * T[0 .. N, 0 .. M]
 		// Note: Behaves as if it's implemented through a series of vdots.
 		// Important: this is a matrix multiply with a pre-transposed S.
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@ -419,11 +419,8 @@ namespace MIPSInt
 	}

 	// The test really needs some work.
-	void Int_Vmmul(MIPSOpcode op)
-	{
-		float s[16];
-		float t[16];
-		float d[16];
+	void Int_Vmmul(MIPSOpcode op) {
+		float s[16]{}, t[16]{}, d[16];

 		int vd = _VD;
 		int vs = _VS;
@ -434,29 +431,37 @@ namespace MIPSInt
 		ReadMatrix(s, sz, vs);
 		ReadMatrix(t, sz, vt);

-		for (int a = 0; a < n; a++)
-		{
-			for (int b = 0; b < n; b++)
-			{
+		for (int a = 0; a < n; a++) {
+			for (int b = 0; b < n; b++) {
 				float sum = 0.0f;
-				for (int c = 0; c < n; c++)
-				{
-					sum += s[b*4 + c] * t[a*4 + c];
+				if (a == n - 1 && b == n - 1) {
+					// S and T prefixes work on the final (or maybe first, in reverse?) dot.
+					ApplySwizzleS(&s[b * 4], V_Quad);
+					ApplySwizzleT(&t[a * 4], V_Quad);
+					for (int c = 0; c < 4; c++) {
+						sum += s[b * 4 + c] * t[a * 4 + c];
+					}
+				} else {
+					for (int c = 0; c < n; c++) {
+						sum += s[b * 4 + c] * t[a * 4 + c];
+					}
 				}
-				d[a*4 + b] = sum;
+				d[a * 4 + b] = sum;
 			}
 		}

+		// The D prefix applies ONLY to the final element, but sat does work.
+		u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
+		u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
+		currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
+		ApplyPrefixD(&d[4 * (n - 1)], V_Quad, false);
 		WriteMatrix(d, sz, vd);
 		PC += 4;
 		EatPrefixes();
 	}

-	void Int_Vmscl(MIPSOpcode op)
-	{
-		float d[16];
-		float s[16];
-		float t[1];
+	void Int_Vmscl(MIPSOpcode op) {
+		float s[16]{}, t[4]{}, d[16];

 		int vd = _VD;
 		int vs = _VS;
@ -467,27 +472,41 @@ namespace MIPSInt
 		ReadMatrix(s, sz, vs);
 		ReadVector(t, V_Single, vt);

-		for (int a = 0; a < n; a++)
-		{
-			for (int b = 0; b < n; b++)
-			{
-				d[a*4 + b] = s[a*4 + b] * t[0];
+		for (int a = 0; a < n - 1; a++) {
+			for (int b = 0; b < n; b++) {
+				d[a * 4 + b] = s[a * 4 + b] * t[0];
 			}
 		}

+		// S prefix applies to the last row.
+		ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
+		// T prefix applies only for the last row, and is used per element.
+		// This is like vscl, but instead of zzzz it uses xxxx.
+		u32 tprefixRemove = VFPU_ANY_SWIZZLE();
+		u32 tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0);
+		ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
+
+		for (int b = 0; b < n; b++) {
+			d[(n - 1) * 4 + b] = s[(n - 1) * 4 + b] * t[b];
+		}
+
+		// The D prefix is applied to the last row.
+		ApplyPrefixD(&d[(n - 1) * 4], V_Quad);
 		WriteMatrix(d, sz, vd);
 		PC += 4;
 		EatPrefixes();
 	}

-	void Int_Vmmov(MIPSOpcode op)
-	{
-		float s[16];
+	void Int_Vmmov(MIPSOpcode op) {
+		float s[16]{};
 		int vd = _VD;
 		int vs = _VS;
 		MatrixSize sz = GetMtxSize(op);
 		ReadMatrix(s, sz, vs);
-		// This is just for matrices. No prefixes.
+		// S and D prefixes are applied to the last row.
+		int off = GetMatrixSide(sz) - 1;
+		ApplySwizzleS(&s[off * 4], V_Quad);
+		ApplyPrefixD(&s[off * 4], V_Quad);
 		WriteMatrix(s, sz, vd);
 		PC += 4;
 		EatPrefixes();