Assorted FPU/VFPU fixes, after adding more tests.

2024-11-26 01:40:24 +00:00 · 2012-11-22 18:12:56 +01:00 · 2012-11-22 18:12:56 +01:00 · 232b84cd10
commit 232b84cd10
parent 15251fe4e9
9 changed files with 136 additions and 56 deletions
--- a/Core/MIPS/MIPSDisVFPU.cpp
+++ b/Core/MIPS/MIPSDisVFPU.cpp
@ -459,6 +459,15 @@ namespace MIPSDis
 		sprintf(out, "%s%s\t%s, %s, %i",name,VSuff(op),VN(vd, sz),VN(vs, sz),imm);
 	}

+	void Dis_Vs2i(u32 op, char *out)
+	{
+		VectorSize sz = GetVecSize(op);
+		int vd = _VD;
+		int vs = _VS;
+		const char *name = MIPSGetName(op);
+		sprintf(out, "%s%s\t%s, %s",name,VSuff(op),VN(vd, sz),VN(vs, sz));
+	}
+
 	void Dis_Vi2x(u32 op, char *out)
 	{
 		VectorSize sz = GetVecSize(op);
--- a/Core/MIPS/MIPSDisVFPU.h
+++ b/Core/MIPS/MIPSDisVFPU.h
@ -55,5 +55,6 @@ namespace MIPSDis
 	void Dis_Vbfy(u32 op, char *out);
 	void Dis_Vf2i(u32 op, char *out);
 	void Dis_Vi2x(u32 op, char *out);
+	void Dis_Vs2i(u32 op, char *out);
 	void Dis_VBranch(u32 op, char *out);
 }
--- a/Core/MIPS/MIPSInt.cpp
+++ b/Core/MIPS/MIPSInt.cpp
@ -46,6 +46,26 @@
 #define HI currentMIPS->hi
 #define LO currentMIPS->lo

+
+inline int is_even(float d) {
+	float int_part;
+	modff(d / 2.0f, &int_part);
+	return 2.0f * int_part == d;
+}
+
+// Rounds *.5 to closest even number
+float round_ieee_754(float d) {
+	float i = floorf(d);
+	d -= i;
+	if(d < 0.5f)
+		return i;
+	if(d > 0.5f)
+		return i + 1.0f;
+	if(is_even(i))
+		return i;
+	return i + 1.0f;
+}
+
 void DelayBranchTo(u32 where)
 {
 	PC += 4;
@ -738,16 +758,6 @@ namespace MIPSInt
 		PC += 4;
 	}

-	#ifdef _MSC_VER
-	static float roundf(float num)
-	{
-		float integer = ceilf(num);
-		if (num > 0)
-			return integer - num > 0.5f ? integer - 1.0f : integer;
-		return integer - num >= 0.5f ? integer - 1.0f : integer;
-	}
-	#endif
-
 	void Int_FPU2op(u32 op)
 	{
 		int fs = _FS;
@ -768,7 +778,7 @@ namespace MIPSInt
 		case 36:
 			switch (currentMIPS->fcr31 & 3)
 			{
-			case 0: FsI(fd) = (int)roundf(F(fs)); break;  // RINT_0    // TODO: rintf or roundf?
+			case 0: FsI(fd) = (int)round_ieee_754(F(fs)); break;  // RINT_0
 			case 1: FsI(fd) = (int)F(fs); break;  // CAST_1
 			case 2: FsI(fd) = (int)ceilf(F(fs)); break;  // CEIL_2
 			case 3: FsI(fd) = (int)floorf(F(fs)); break;  // FLOOR_3
@ -788,31 +798,34 @@ namespace MIPSInt
 		bool cond;
 		switch (op & 0xf)
 		{
+		case 0: //f
+		case 1: //un
+		case 8: //sf
+		case 9: //ngle
+			cond = false;
+			break;
+
 		case 2: //eq
+		case 10: //seq
+		case 3: //ueq
+		case 11: //ngl
 			cond = (F(fs) == F(ft));
 			break;
+
+		case 4: //olt
+		case 5: //ult
 		case 12: //lt
 		case 13: //nge
 			cond = (F(fs) < F(ft));
 			break;

+		case 6: //ole
+		case 7: //ule
 		case 14: //le
 		case 15: //ngt
 			cond = (F(fs) <= F(ft));
 			break;

-		case 0: //f
-		case 1: //un
-		case 3: //ueq
-		case 4: //olt
-		case 5: //ult
-		case 6: //ole
-		case 7: //ule
-		case 8: //sf
-		case 9: //ngle
-		case 10: //seq
-		case 11: //ngl
-
 		default:
 			_dbg_assert_msg_(CPU,0,"Trying to interpret FPUComp instruction that can't be interpreted");
 			cond = false;
--- a/Core/MIPS/MIPSInt.h
+++ b/Core/MIPS/MIPSInt.h
@ -22,6 +22,7 @@
 u32 MIPS_GetNextPC();
 void MIPS_ClearDelaySlot();
 int MIPS_SingleStep();
+float round_ieee_754(float num);

 namespace MIPSInt
 {
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@ -365,7 +365,7 @@ namespace MIPSInt
 		PC += 4;
 		EatPrefixes();
 	}
-
+	// The test really needs some work.
 	void Int_Vmmul(u32 op)
 	{
 		float s[16];
@ -390,7 +390,7 @@ namespace MIPSInt
 				{
 					sum += s[b*4 + c] * t[a*4 + c];
 				}
-				d[a*4 + b]=sum;
+				d[a*4 + b] = sum;
 			}
 		}

@ -434,6 +434,7 @@ namespace MIPSInt
 		int vs = _VS;
 		MatrixSize sz = GetMtxSize(op);
 		ReadMatrix(s, sz, vs);
+		// This is just for matrices. No prefixes.
 		WriteMatrix(s, sz, vd);
 		PC += 4;
 		EatPrefixes();
@ -543,8 +544,8 @@ namespace MIPSInt
 		{
 			switch ((op >> 21) & 0x1f)
 			{
-			case 16: d[i] = (int)floor(s[i] * mult + 0.5f); break; //n
-			case 17: d[i] = s[i]>=0?(int)floor(s[i] * mult) : (int)ceil(s[i] * mult); break; //z
+			case 16: d[i] = (int)round_ieee_754(s[i] * mult); break; //n
+			case 17: d[i] = s[i]>=0 ? (int)floor(s[i] * mult) : (int)ceil(s[i] * mult); break; //z
 			case 18: d[i] = (int)ceil(s[i] * mult); break; //u
 			case 19: d[i] = (int)floor(s[i] * mult); break; //d
 			}
@ -727,9 +728,8 @@ namespace MIPSInt
 			{
 				for (int i = 0; i < 4; i++)
 				{
-					int v = s[i];
-					v >>= 24;
-					d[0] |= ((u32)v & 0xFF) << (i * 8);
+					u32 v = s[i];
+					d[0] |= (v >> 24) << (i * 8);
 				}
 				oz = V_Single;
 			}
@ -1061,7 +1061,7 @@ namespace MIPSInt
 				}
 			}
 		}
-		else if (n == ins+1)
+		else if (n == ins + 1)
 		{
 			for (int i = 0; i < n; i++)
 			{
@ -1130,7 +1130,7 @@ namespace MIPSInt
 		static const float constants[32] =
 		{
 			0,
-			std::numeric_limits<float>::max(),  // or max() ??   pspautotests seem to indicate inf
+			std::numeric_limits<float>::max(),  // all these are verified on real PSP
 			sqrtf(2.0f),
 			sqrtf(0.5f),
 			2.0f/sqrtf((float)M_PI),
@ -1147,8 +1147,8 @@ namespace MIPSInt
 			2*(float)M_PI,
 			(float)M_PI/6,
 			log10f(2.0f),
-			logf(10.0f)/logf(2.0f), //"Log2(10)",
-			sqrtf(3.0f)/2.0f, //"Sqrt(3)/2"
+			logf(10.0f)/logf(2.0f),
+			sqrtf(3.0f)/2.0f,
 		};

 		int conNum = (op >> 16) & 0x1f;
@ -1261,7 +1261,7 @@ namespace MIPSInt
 			break;
 		case 3: // vmax
 			for (int i = 0; i < GetNumVectorElements(sz); i++)
-				d[i] = isnan(t[i]) ? s[i] : (isnan(s[i]) ? t[i] : std::max(s[i], t[i]));
+				d[i] = isnan(t[i]) ? t[i] : (isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
 			break;
 		default:
 			_dbg_assert_msg_(CPU,0,"unknown min/max op %d", cond);
@ -1273,6 +1273,57 @@ namespace MIPSInt
 		EatPrefixes();
 	}

+	void Int_Vsge(u32 op) {
+		int vt = _VT;
+		int vs = _VS;
+		int vd = _VD;
+		int cond = op&15;
+		VectorSize sz = GetVecSize(op);
+		int n = GetNumVectorElements(sz);
+		float s[4];
+		float t[4];
+		float d[4];
+		ReadVector(s, sz, vs);
+		ApplySwizzleS(s, sz);
+		ReadVector(t, sz, vt);
+		ApplySwizzleT(t, sz);
+		// positive NAN always loses, unlike SSE
+		// negative NAN seems different? TODO
+		for (int i = 0; i < GetNumVectorElements(sz); i++)
+			d[i] = s[i] >= t[i] ? 1.0f : 0.0f;
+
+		ApplyPrefixD(d, sz);
+		WriteVector(d, sz, vd);
+		PC += 4;
+		EatPrefixes();
+	}
+
+	void Int_Vslt(u32 op) {
+		int vt = _VT;
+		int vs = _VS;
+		int vd = _VD;
+		int cond = op&15;
+		VectorSize sz = GetVecSize(op);
+		int n = GetNumVectorElements(sz);
+		float s[4];
+		float t[4];
+		float d[4];
+		ReadVector(s, sz, vs);
+		ApplySwizzleS(s, sz);
+		ReadVector(t, sz, vt);
+		ApplySwizzleT(t, sz);
+		// positive NAN always loses, unlike SSE
+		// negative NAN seems different? TODO
+		for (int i = 0; i < GetNumVectorElements(sz); i++)
+			d[i] = s[i] < t[i] ? 1.0f : 0.0f;
+
+		ApplyPrefixD(d, sz);
+		WriteVector(d, sz, vd);
+		PC += 4;
+		EatPrefixes();
+	}
+
+
 	void Int_Vcmov(u32 op)
 	{
 		int vs = _VS;
@ -1374,20 +1425,23 @@ bad:
 		ReadVector(t, sz, vt);
 		switch (sz)
 		{
-		case V_Triple:  // vcrsp
+		case V_Triple:  // vcrsp.t
 			d[0] = s[1]*t[2] - s[2]*t[1];
 			d[1] = s[2]*t[0] - s[0]*t[2];
 			d[2] = s[0]*t[1] - s[1]*t[0];
-			//cross
 			break;
-		//case V_Quad:
-			//quat
-		//	break;
+
+		case V_Quad:   // vqmul.q
+			d[0] = s[0]*t[3] + s[1]*t[2] - s[2]*t[1] + s[3]*t[0];
+			d[1] = -s[0]*t[2] + s[1]*t[3] + s[2]*t[0] + s[3]*t[1];
+			d[2] = s[0]*t[1] - s[1]*t[0] + s[2]*t[3] + s[3]*t[2];
+			d[3] = -s[0]*t[0] - s[1]*t[1] - s[2]*t[2] + s[3]*t[3];
+			break;
+
 		default:
 			_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
 			break;
 		}
-		ApplyPrefixD(d, sz);
 		WriteVector(d, sz, vd);
 		PC += 4;
 		EatPrefixes();
--- a/Core/MIPS/MIPSIntVFPU.h
+++ b/Core/MIPS/MIPSIntVFPU.h
@ -65,5 +65,7 @@ namespace MIPSInt
 	void Int_ColorConv(u32 op);
 	void Int_Vh2f(u32 op);
 	void Int_Vf2h(u32 op);
+	void Int_Vsge(u32 op);
+	void Int_Vslt(u32 op);
 }

--- a/Core/MIPS/MIPSTables.cpp
+++ b/Core/MIPS/MIPSTables.cpp
@ -506,12 +506,12 @@ MIPSInstruction tableVFPU3[8] = //011011 xxx
 {
 	INSTR("vcmp",&Jit::Comp_Generic, Dis_Vcmp, Int_Vcmp, IS_VFPU),
 	{-2},
-	INSTR("vmin",&Jit::Comp_Generic, Dis_Generic, Int_Vminmax, IS_VFPU),
-	INSTR("vmax",&Jit::Comp_Generic, Dis_Generic, Int_Vminmax, IS_VFPU), 
+	INSTR("vmin",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vminmax, IS_VFPU),
+	INSTR("vmax",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vminmax, IS_VFPU), 
 	{-2}, 
 	INSTR("vscmp",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), 
-	INSTR("vsge",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), 
-	INSTR("vslt",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU),
+	INSTR("vsge",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vsge, IS_VFPU), 
+	INSTR("vslt",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vslt, IS_VFPU),
 };


@ -570,10 +570,10 @@ MIPSInstruction tableVFPU7[32] =
 	{-2},
 	INSTR("vlgb", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU),
 	//24
-	INSTR("vuc2i", &Jit::Comp_Generic, Dis_Generic, Int_Vx2i, IS_VFPU),  // Seen in BraveStory, initialization  110100 00001110000 000 0001 0000 0000
-	INSTR("vc2i", &Jit::Comp_Generic, Dis_Generic, Int_Vx2i, IS_VFPU),
-	INSTR("vus2i", &Jit::Comp_Generic, Dis_Generic, Int_Vx2i, IS_VFPU),
-	INSTR("vs2i", &Jit::Comp_Generic, Dis_Generic, Int_Vx2i, IS_VFPU),
+	INSTR("vuc2i", &Jit::Comp_Generic, Dis_Vs2i, Int_Vx2i, IS_VFPU),  // Seen in BraveStory, initialization  110100 00001110000 000 0001 0000 0000
+	INSTR("vc2i", &Jit::Comp_Generic, Dis_Vs2i, Int_Vx2i, IS_VFPU),
+	INSTR("vus2i", &Jit::Comp_Generic, Dis_Vs2i, Int_Vx2i, IS_VFPU),
+	INSTR("vs2i", &Jit::Comp_Generic, Dis_Vs2i, Int_Vx2i, IS_VFPU),

 	INSTR("vi2uc", &Jit::Comp_Generic, Dis_Vi2x, Int_Vi2x, IS_VFPU),
 	INSTR("vi2c",  &Jit::Comp_Generic, Dis_Vi2x, Int_Vi2x, IS_VFPU),
@ -654,10 +654,10 @@ MIPSInstruction tableVFPU6[32] =  //111100 xxx
 	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU),
 	INSTR("vmscl",&Jit::Comp_Generic, Dis_Generic, Int_Vmscl, IS_VFPU),

-	INSTR("vcrsp/vqm",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
-	INSTR("vcrsp/vqm",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
-	INSTR("vcrsp/vqm",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
-	INSTR("vcrsp/vqm",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
+	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
+	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
+	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
+	INSTR("vcrsp.t/vqmul.q",&Jit::Comp_Generic, Dis_CrossQuat, Int_CrossQuat, IS_VFPU),
 //24
 	{-2},
 	{-2},
@ -788,7 +788,7 @@ const int encodingBits[NumEncodings][2] =
 	{16, 5}, //VFPU4
 	{23, 3}, //VFPU5
 	{21, 5}, //VFPU6
-	{16, 3}, //VFPUMatrix1
+	{16, 4}, //VFPUMatrix1
 	{16, 5}, //VFPU9
 	{6,  5}, //ALLEGREX0
 	{24, 2}, //EMUHACK
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 188c0c1a2f9e4d93bc2c7ebda5a73a204dc432f5
+Subproject commit 42fcb79e874ebc9ea2ee06793f1e28ab339bab20
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit e05c2b94756c37ecedbea5ee1af538d66785fa30
+Subproject commit 45a43b253257b2bd4d6b4c84499e39ab73cc1022