Initial / simple vmscl for x86 jit.

2024-12-02 22:26:25 +00:00 · 2013-04-20 01:34:16 -07:00 · 2013-04-20 01:34:16 -07:00 · 9245490b53
commit 9245490b53
parent 29109d25af
5 changed files with 55 additions and 4 deletions
--- a/Core/MIPS/ARM/ArmCompVFPU.cpp
+++ b/Core/MIPS/ARM/ArmCompVFPU.cpp
@ -722,4 +722,8 @@ namespace MIPSComp
 		DISABLE;
 	}

+	void Jit::Comp_Vmscl(u32 op) {
+		DISABLE;
+	}
+
 }
--- a/Core/MIPS/ARM/ArmJit.h
+++ b/Core/MIPS/ARM/ArmJit.h
@ -195,6 +195,7 @@ public:
 	void Comp_Vmmov(u32 op);
 	void Comp_VScl(u32 op);
 	void Comp_Vmmul(u32 op);
+	void Comp_Vmscl(u32 op);

 	ArmJitBlockCache *GetBlockCache() { return &blocks; }

--- a/Core/MIPS/MIPSTables.cpp
+++ b/Core/MIPS/MIPSTables.cpp
@ -646,10 +646,10 @@ const MIPSInstruction tableVFPU6[32] =  //111100 xxx
 	INSTR("v(h)tfm4",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX),
 	INSTR("v(h)tfm4",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX),
 	//16
-	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
-	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
-	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
-	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
+	INSTR("vmscl",&Jit::Comp_Vmscl, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
+	INSTR("vmscl",&Jit::Comp_Vmscl, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
+	INSTR("vmscl",&Jit::Comp_Vmscl, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),
+	INSTR("vmscl",&Jit::Comp_Vmscl, Dis_Generic, Int_Vmscl, IS_VFPU|OUT_EAT_PREFIX),

 	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU|OUT_EAT_PREFIX),
 	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU|OUT_EAT_PREFIX),
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -831,4 +831,49 @@ void Jit::Comp_Vmmul(u32 op) {
 	fpr.ReleaseSpillLocks();
 }

+void Jit::Comp_Vmscl(u32 op) {
+	CONDITIONAL_DISABLE;
+
+	// TODO: This probably ignores prefixes?
+	if (js.MayHavePrefix())
+		DISABLE;
+
+	MatrixSize sz = GetMtxSize(op);
+	int n = GetMatrixSide(sz);
+
+	u8 sregs[16], dregs[16], scale;
+	GetMatrixRegs(sregs, sz, _VS);
+	GetVectorRegs(&scale, V_Single, _VT);
+	GetMatrixRegs(dregs, sz, _VD);
+
+	// Move to XMM0 early, so we don't have to worry about overlap with scale.
+	MOVSS(XMM0, fpr.V(scale));
+
+	// TODO: test overlap, optimize.
+	u8 tempregs[16];
+	for (int a = 0; a < n; a++)
+	{
+		for (int b = 0; b < n; b++)
+		{
+			u8 temp = (u8) fpr.GetTempV();
+			fpr.MapRegV(temp, MAP_NOINIT | MAP_DIRTY);
+			MOVSS(fpr.VX(temp), fpr.V(sregs[a * 4 + b]));
+			MULSS(fpr.VX(temp), R(XMM0));
+			fpr.StoreFromRegisterV(temp);
+			tempregs[a * 4 + b] = temp;
+		}
+	}
+	for (int a = 0; a < n; a++)
+	{
+		for (int b = 0; b < n; b++)
+		{
+			u8 temp = tempregs[a * 4 + b];
+			fpr.MapRegV(temp, 0);
+			MOVSS(fpr.V(dregs[a * 4 + b]), fpr.VX(temp));
+		}
+	}
+
+	fpr.ReleaseSpillLocks();
+}
+
 }
--- a/Core/MIPS/x86/Jit.h
+++ b/Core/MIPS/x86/Jit.h
@ -205,6 +205,7 @@ public:
 	void Comp_Vmmov(u32 op);
 	void Comp_VScl(u32 op);
 	void Comp_Vmmul(u32 op);
+	void Comp_Vmscl(u32 op);

 	void Comp_DoNothing(u32 op);