Fix vf2i properly on x86.

2025-02-25 11:20:55 +00:00 · 2013-08-07 21:30:57 +02:00 · 2013-08-07 21:30:57 +02:00 · 8714240519
commit 8714240519
parent 6e5b4ca082
4 changed files with 38 additions and 35 deletions
--- a/Common/x64Emitter.cpp
+++ b/Common/x64Emitter.cpp
@ -1298,6 +1298,7 @@ void XEmitter::CVTSI2SS(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2A, false,
 void XEmitter::CVTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2D, false, xregdest, arg);}
 void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
 void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
+void XEmitter::CVTTSD2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0x2C, false, xregdest, arg);}

 void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src)  {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}

--- a/Common/x64Emitter.h
+++ b/Common/x64Emitter.h
@ -581,6 +581,7 @@ public:
 	void CVTSI2SS(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTSS2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTSS2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
+	void CVTTSD2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTPS2DQ(X64Reg regOp, OpArg arg);

 	// SSE2: Packed integer instructions
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@ -600,13 +600,12 @@ namespace MIPSInt
 		EatPrefixes();
 	}

-	inline int round_vfpu_n(float param) {
+	inline int round_vfpu_n(double param) {
 		// return floorf(param);
 		return (int)round_ieee_754(param);
 	}

-	void Int_Vf2i(u32 op)
-	{
+	void Int_Vf2i(u32 op) {
 		float s[4];
 		int d[4];
 		int vd = _VD;
@ -616,25 +615,28 @@ namespace MIPSInt
 		VectorSize sz = GetVecSize(op);
 		ReadVector(s, sz, vs);
 		ApplySwizzleS(s, sz); //TODO: and the mask to kill everything but swizzle
-		for (int i = 0; i < GetNumVectorElements(sz); i++)
-		{
+		for (int i = 0; i < GetNumVectorElements(sz); i++) {
 			if (my_isnan(s[i])) {
 				d[i] = 0x7FFFFFFF;
 				continue;
 			}
 			double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000
 			// Cap/floor it to 0x7fffffff / 0x80000000
-			if (sv > 0x7fffffff) sv = 0x7fffffff;
-			if (sv < (int)0x80000000) sv = (int)0x80000000;
+			if (sv > (double)0x7fffffff) {
+				d[i] = 0x7fffffff;
+			} else if (sv <= (double)(int)0x80000000) {
+				d[i] = 0x80000000;
+			} else {
 				switch ((op >> 21) & 0x1f)
 				{
-			case 16: d[i] = (int)(floor(sv + 0.5f)); break; //n  (round_vfpu_n causes issue #3011 but seems right according to tests...)
+				case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n  (round_vfpu_n causes issue #3011 but seems right according to tests...)
 				case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z
 				case 18: d[i] = (int)ceil(sv); break; //u
 				case 19: d[i] = (int)floor(sv); break; //d
 				default: d[i] = 0x7FFFFFFF; break;
 				}
 			}
+		}
 		ApplyPrefixD((float*)d, sz, true);
 		WriteVector((float*)d, sz, vd);
 		PC += 4;
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -975,26 +975,24 @@ void Jit::Comp_Vi2f(u32 op) {
 	fpr.ReleaseSpillLocks();
 }

-extern const float mulTableVf2i[32] = {
-	(float)(1ULL<<0),(float)(1ULL<<1),(float)(1ULL<<2),(float)(1ULL<<3),
-	(float)(1ULL<<4),(float)(1ULL<<5),(float)(1ULL<<6),(float)(1ULL<<7),
-	(float)(1ULL<<8),(float)(1ULL<<9),(float)(1ULL<<10),(float)(1ULL<<11),
-	(float)(1ULL<<12),(float)(1ULL<<13),(float)(1ULL<<14),(float)(1ULL<<15),
-	(float)(1ULL<<16),(float)(1ULL<<17),(float)(1ULL<<18),(float)(1ULL<<19),
-	(float)(1ULL<<20),(float)(1ULL<<21),(float)(1ULL<<22),(float)(1ULL<<23),
-	(float)(1ULL<<24),(float)(1ULL<<25),(float)(1ULL<<26),(float)(1ULL<<27),
-	(float)(1ULL<<28),(float)(1ULL<<29),(float)(1ULL<<30),(float)(1ULL<<31),
+extern const double mulTableVf2i[32] = {
+	(1ULL<<0),(1ULL<<1),(1ULL<<2),(1ULL<<3),
+	(1ULL<<4),(1ULL<<5),(1ULL<<6),(1ULL<<7),
+	(1ULL<<8),(1ULL<<9),(1ULL<<10),(1ULL<<11),
+	(1ULL<<12),(1ULL<<13),(1ULL<<14),(1ULL<<15),
+	(1ULL<<16),(1ULL<<17),(1ULL<<18),(1ULL<<19),
+	(1ULL<<20),(1ULL<<21),(1ULL<<22),(1ULL<<23),
+	(1ULL<<24),(1ULL<<25),(1ULL<<26),(1ULL<<27),
+	(1ULL<<28),(1ULL<<29),(1ULL<<30),(1ULL<<31),
 };

 static const float half = 0.5f;

-static const float maxIntAsFloat = (float)(int)0x7FFFFFFF;
-static const float minIntAsFloat = (float)(int)0x80000000;
+static double maxIntAsDouble = (double)0x7fffffff;  // that's not equal to 0x80000000
+static double minIntAsDouble = (double)(int)0x80000000;

 void Jit::Comp_Vf2i(u32 op) {
 	CONDITIONAL_DISABLE;
-	DISABLE;  // Broken :(   (KH)
-
 	if (js.HasUnknownPrefix())
 		DISABLE;

@ -1002,7 +1000,7 @@ void Jit::Comp_Vf2i(u32 op) {
 	int n = GetNumVectorElements(sz);

 	int imm = (op >> 16) & 0x1f;
-	const float *mult = &mulTableVf2i[imm];
+	const double *mult = &mulTableVf2i[imm];

 	switch ((op >> 21) & 0x1f)
 	{
@ -1029,21 +1027,22 @@ void Jit::Comp_Vf2i(u32 op) {
 	}

 	if (*mult != 1.0f)
-		MOVSS(XMM1, M((void *)mult));
+		MOVSD(XMM1, M((void *)mult));

 	fpr.MapRegsV(tempregs, sz, MAP_DIRTY | MAP_NOINIT);
 	for (int i = 0; i < n; i++) {
+		// Need to do this in double precision to clamp correctly as float
+		// doesn't have enough precision to represent 0x7fffffff for example exactly.
 		MOVSS(XMM0, fpr.V(sregs[i]));
+		CVTSS2SD(XMM0, R(XMM0)); // convert to double precision
 		if (*mult != 1.0f) {
-			if (*mult != 1.0f)
-				MULSS(XMM0, R(XMM1));
+			MULSD(XMM0, R(XMM1));
 		}
-		// Clamp to max and min
-		MINSS(XMM0, M((void *)&maxIntAsFloat));
-		MAXSS(XMM0, M((void *)&minIntAsFloat));
+		MINSD(XMM0, M((void *)&maxIntAsDouble));
+		MAXSD(XMM0, M((void *)&minIntAsDouble));
 		switch ((op >> 21) & 0x1f) {
 		case 16: /* TODO */ break; //n  (round_vfpu_n causes issue #3011 but seems right according to tests...)
-		case 17: CVTTSS2SI(EAX, R(XMM0)); break; //z - truncate
+		case 17: CVTTSD2SI(EAX, R(XMM0)); break; //z - truncate
 		case 18: /* TODO */ break; //u
 		case 19: /* TODO */ break; //d
 		}