x86 jit: Minor optimization in vmmul

2024-11-27 07:20:49 +00:00 · 2014-12-06 00:27:31 +01:00 · 2014-12-06 00:27:31 +01:00 · d46c9c2f74
commit d46c9c2f74
parent fb251c9a5c
1 changed files with 5 additions and 1 deletions
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -2661,9 +2661,13 @@ void Jit::Comp_Vmmul(MIPSOpcode op) {
 		// TODO: With more temp registers, can generate much more efficient code.
 		for (int i = 0; i < n; i++) {
 			MOVSS(XMM1, fpr.V(tregs[4 * i]));  // TODO: AVX broadcastss to replace this and the SHUFPS
+			MOVSS(XMM0, fpr.V(tregs[4 * i + 1]));
 			SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(0, 0, 0, 0));
+			SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
 			MULPS(XMM1, fpr.VS(scol[0]));
-			for (int j = 1; j < n; j++) {
+			MULPS(XMM0, fpr.VS(scol[1]));
+			ADDPS(XMM1, R(XMM0));
+			for (int j = 2; j < n; j++) {
 				MOVSS(XMM0, fpr.V(tregs[4 * i + j]));
 				SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
 				MULPS(XMM0, fpr.VS(scol[j]));