x86 jit: SIMD-ify VFPU register file writebacks where possible

2024-11-23 13:30:02 +00:00 · 2014-11-26 01:33:05 +01:00 · 2014-11-26 01:33:05 +01:00 · 804de50711
commit 804de50711
parent a6eb4c7e73
3 changed files with 80 additions and 16 deletions
--- a/Common/x64Emitter.h
+++ b/Common/x64Emitter.h
@ -631,6 +631,25 @@ public:
 	// SSE/SSE2: Useful alternative to shuffle in some cases.
 	void MOVDDUP(X64Reg regOp, OpArg arg);

+	// TODO: Actually implement
+#if 0
+	// SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
+	void ADDSUBPS(X64Reg dest, OpArg src);
+	void ADDSUBPD(X64Reg dest, OpArg src);
+	void HADDPS(X64Reg dest, OpArg src);
+	void HADDPD(X64Reg dest, OpArg src);
+	void HSUBPS(X64Reg dest, OpArg src);
+	void HSUBPD(X64Reg dest, OpArg src);
+
+	// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
+	void DPPS(X64Reg dest, OpArg src, u8 arg);
+	void DPPD(X64Reg dest, OpArg src, u8 arg);
+
+	// These are probably useful for VFPU emulation.
+	void INSERTPS(X64Reg dest, OpArg src, u8 arg);
+	void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
+#endif
+
 	void UNPCKLPS(X64Reg dest, OpArg src);
 	void UNPCKHPS(X64Reg dest, OpArg src);
 	void UNPCKLPD(X64Reg dest, OpArg src);
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -2292,6 +2292,19 @@ void Jit::Comp_VScl(MIPSOpcode op) {
 	GetVectorRegsPrefixT(&scale, V_Single, _VT);
 	GetVectorRegsPrefixD(dregs, sz, _VD);

+	if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, &scale, V_Single, true)) {
+		MOVSS(XMM0, fpr.VS(scale));
+		if (sz != V_Single)
+			SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+		if (dregs[0] != sregs[0]) {
+			MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0]));
+		}
+		MULPS(fpr.VSX(dregs[0]), R(XMM0));
+		ApplyPrefixD(dregs, sz);
+		fpr.ReleaseSpillLocks();
+		return;
+	}
+
 	// Flush SIMD.
 	fpr.SimpleRegsV(sregs, sz, 0);
 	fpr.SimpleRegsV(&scale, V_Single, 0);
--- a/Core/MIPS/x86/RegCacheFPU.cpp
+++ b/Core/MIPS/x86/RegCacheFPU.cpp
@ -507,29 +507,61 @@ void FPURegCache::StoreFromRegister(int i) {
 		X64Reg xr = regs[i].location.GetSimpleReg();
 		_assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - store - invalid reg");
 		if (regs[i].lane != 0) {
-			// Store all of them.
-			// TODO: This could be more optimal.  Check if we can MOVUPS/MOVAPS, etc.
-			for (int j = 0; j < 4; ++j) {
-				int mr = xregs[xr].mipsRegs[j];
-				if (mr == -1) {
-					continue;
+			const int *mri = xregs[xr].mipsRegs;
+			int seq = 1;
+			for (int i = 1; i < 4; ++i) {
+				if (mri[i] == -1) {
+					break;
 				}
-				if (j != 0 && xregs[xr].dirty) {
-					emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j));
+				if (voffset[mri[i] - 32] == voffset[mri[i - 1] - 32] + 1) {
+					seq++;
+				} else {
+					break;
 				}
+			}

-				OpArg newLoc = GetDefaultLocation(mr);
-				if (xregs[xr].dirty) {
-					emit->MOVSS(newLoc, xr);
+			if (seq == 2 || seq == 4) {
+				OpArg newLoc = GetDefaultLocation(mri[0]);
+				if (seq == 4)
+					emit->MOVAPS(newLoc, xr);
+				else
+					emit->MOVQ_xmm(newLoc, xr);
+				for (int j = 0; j < 4; ++j) {
+					int mr = xregs[xr].mipsRegs[j];
+					if (mr == -1) {
+						continue;
+					}
+					OpArg newLoc = GetDefaultLocation(mr);
+					regs[mr].location = newLoc;
+					regs[mr].away = false;
+					regs[mr].lane = 0;
+					xregs[xr].mipsRegs[j] = -1;
+				}
+			} else {
+				// Store all of them.
+				// TODO: This could be more optimal.  Check if we can MOVUPS/MOVAPS, etc.
+				for (int j = 0; j < 4; ++j) {
+					int mr = xregs[xr].mipsRegs[j];
+					if (mr == -1) {
+						continue;
+					}
+					if (j != 0 && xregs[xr].dirty) {
+						emit->SHUFPS(xr, Gen::R(xr), MMShuffleSwapTo0(j));
+					}
+
+					OpArg newLoc = GetDefaultLocation(mr);
+					if (xregs[xr].dirty) {
+						emit->MOVSS(newLoc, xr);
+					}
+					regs[mr].location = newLoc;
+					regs[mr].away = false;
+					regs[mr].lane = 0;
+					xregs[xr].mipsRegs[j] = -1;
 				}
-				regs[mr].location = newLoc;
-				regs[mr].away = false;
-				regs[mr].lane = 0;
-				xregs[xr].mipsRegs[j] = -1;
 			}
 		} else {
-			xregs[xr].mipsReg = -1;
 			OpArg newLoc = GetDefaultLocation(i);
+			xregs[xr].mipsReg = -1;
 			emit->MOVSS(newLoc, xr);
 			regs[i].location = newLoc;
 		}