Merge pull request #6092 from unknownbrackets/armjit-minor

Clean up some NaN handling in some instructions
2025-01-23 22:36:21 +00:00 · 2014-05-16 10:20:25 +02:00 · 2014-05-16 10:20:25 +02:00 · 965cdb3832
commit 965cdb3832
parent ad177b3333 5b24e0107f
3 changed files with 66 additions and 20 deletions
--- a/Core/MIPS/ARM/ArmCompVFPU.cpp
+++ b/Core/MIPS/ARM/ArmCompVFPU.cpp
@ -194,7 +194,7 @@ namespace MIPSComp
 				MOVI2F(S1, 1.0f, SCRATCHREG1);
 				VCMP(fpr.V(vregs[i]), S0);
 				VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
-				SetCC(CC_LE);
+				SetCC(CC_LS);
 				VMOV(fpr.V(vregs[i]), S0);
 				SetCC(CC_AL);
 				VCMP(fpr.V(vregs[i]), S1);
@ -209,7 +209,7 @@ namespace MIPSComp
 				MOVI2F(S1, 1.0f, SCRATCHREG1);
 				VCMP(fpr.V(vregs[i]), S0);
 				VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
-				SetCC(CC_LT);
+				SetCC(CC_LO);
 				VMOV(fpr.V(vregs[i]), S0);
 				SetCC(CC_AL);
 				VCMP(fpr.V(vregs[i]), S1);
@ -744,6 +744,7 @@ namespace MIPSComp
 				case 2:  // vmin
 					VCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
 					VMRS_APSR();
+					// TODO: Technically should use NaN sign bit.
 					SetCC(CC_LT);
 					VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
 					SetCC(CC_GE);
@ -753,6 +754,7 @@ namespace MIPSComp
 				case 3:  // vmax
 					VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
 					VMRS_APSR();
+					// TODO: Technically should use NaN sign bit.
 					SetCC(CC_LT);
 					VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
 					SetCC(CC_GE);
@ -763,6 +765,7 @@ namespace MIPSComp
 					DISABLE;  // pending testing
 					VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
 					VMRS_APSR();
+					// Unordered is always 0.
 					SetCC(CC_GE);
 					MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
 					SetCC(CC_LT);
@ -773,9 +776,10 @@ namespace MIPSComp
 					DISABLE;  // pending testing
 					VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
 					VMRS_APSR();
-					SetCC(CC_LT);
+					// Unordered is always 0.
+					SetCC(CC_LO);
 					MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
-					SetCC(CC_GE);
+					SetCC(CC_HS);
 					MOVI2F(fpr.V(tempregs[i]), 0.0f, SCRATCHREG1);
 					SetCC(CC_AL);
 					break;
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@ -512,7 +512,7 @@ namespace MIPSInt
 			case 0: d[i] = s[i]; break; //vmov
 			case 1: d[i] = fabsf(s[i]); break; //vabs
 			case 2: d[i] = -s[i]; break; //vneg
-			// vsat0 changes -0.0 to +0.0.
+			// vsat0 changes -0.0 to +0.0, both retain NAN.
 			case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
 			case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
 			case 16: d[i] = 1.0f / s[i]; break; //vrcp
@ -1537,16 +1537,19 @@ namespace MIPSInt
 		ApplySwizzleS(s, sz);
 		ReadVector(t, sz, vt);
 		ApplySwizzleT(t, sz);
-		// positive NAN always loses, unlike SSE
-		// negative NAN seems different? TODO
+
+		// If both are zero, take t's sign.
+		// TODO: Otherwise: -NAN < -INF < real < INF < NAN
+
 		switch ((op >> 23) & 3) {
 		case 2: // vmin
-			for (int i = 0; i < numElements; i++)
-				d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(s[i], t[i]));
+			for (int i = 0; i < numElements; i++) {
+				d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(t[i], s[i]));
+			}
 			break;
 		case 3: // vmax
 			for (int i = 0; i < numElements; i++)
-				d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
+				d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(t[i], s[i]));
 			break;
 		default:
 			_dbg_assert_msg_(CPU,0,"unknown min/max op %d", cond);
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -165,14 +165,33 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
 		if (sat == 1)
 		{
 			fpr.MapRegV(vregs[i], MAP_DIRTY);
-			MAXSS(fpr.VX(vregs[i]), M(&zero));
-			MINSS(fpr.VX(vregs[i]), M(&one));
+
+			// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
+			MOVSS(R(XMM0), fpr.VX(vregs[i]));
+			CMPLESS(XMM0, M(&zero));
+			ANDNPS(XMM0, fpr.V(vregs[i]));
+
+			// Retain a NAN in XMM0 (must be second operand.)
+			MOVSS(fpr.VX(vregs[i]), M(&one));
+			MINSS(fpr.VX(vregs[i]), R(XMM0));
 		}
 		else if (sat == 3)
 		{
 			fpr.MapRegV(vregs[i], MAP_DIRTY);
-			MAXSS(fpr.VX(vregs[i]), M(&minus_one));
-			MINSS(fpr.VX(vregs[i]), M(&one));
+
+			// Check for < -1.0f, but careful of NANs.
+			MOVSS(XMM1, M(&minus_one));
+			MOVSS(R(XMM0), fpr.VX(vregs[i]));
+			CMPLESS(XMM0, R(XMM1));
+			// If it was NOT less, the three ops below do nothing.
+			// Otherwise, they replace the value with -1.0f.
+			ANDPS(XMM1, R(XMM0));
+			ANDNPS(XMM0, fpr.V(vregs[i]));
+			ORPS(XMM0, R(XMM1));
+
+			// Retain a NAN in XMM0 (must be second operand.)
+			MOVSS(fpr.VX(vregs[i]), M(&one));
+			MINSS(fpr.VX(vregs[i]), R(XMM0));
 		}
 	}
 }
@ -835,12 +854,15 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
 			switch ((op >> 23) & 7)
 			{
 			case 2:  // vmin
+				// TODO: Mishandles NaN.
 				MINSS(tempxregs[i], fpr.V(tregs[i]));
 				break;
 			case 3:  // vmax
+				// TODO: Mishandles NaN.
 				MAXSS(tempxregs[i], fpr.V(tregs[i]));
 				break;
 			case 6:  // vsge
+				// TODO: Mishandles NaN.
 				CMPNLTSS(tempxregs[i], fpr.V(tregs[i]));
 				ANDPS(tempxregs[i], M(&oneOneOneOne));
 				break;
@ -1560,16 +1582,33 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
 		case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
 			if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
 				MOVSS(tempxregs[i], fpr.V(sregs[i]));
-			// TODO: Doesn't handle NaN correctly.
-			MAXSS(tempxregs[i], M(&zero));
-			MINSS(tempxregs[i], M(&one));
+
+			// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
+			MOVSS(R(XMM0), tempxregs[i]);
+			CMPLESS(XMM0, M(&zero));
+			ANDNPS(XMM0, R(tempxregs[i]));
+
+			// Retain a NAN in XMM0 (must be second operand.)
+			MOVSS(tempxregs[i], M(&one));
+			MINSS(tempxregs[i], R(XMM0));
 			break;
 		case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
 			if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
 				MOVSS(tempxregs[i], fpr.V(sregs[i]));
-			// TODO: Doesn't handle NaN correctly.
-			MAXSS(tempxregs[i], M(&minus_one));
-			MINSS(tempxregs[i], M(&one));
+
+			// Check for < -1.0f, but careful of NANs.
+			MOVSS(XMM1, M(&minus_one));
+			MOVSS(R(XMM0), tempxregs[i]);
+			CMPLESS(XMM0, R(XMM1));
+			// If it was NOT less, the three ops below do nothing.
+			// Otherwise, they replace the value with -1.0f.
+			ANDPS(XMM1, R(XMM0));
+			ANDNPS(XMM0, R(tempxregs[i]));
+			ORPS(XMM0, R(XMM1));
+
+			// Retain a NAN in XMM0 (must be second operand.)
+			MOVSS(tempxregs[i], M(&one));
+			MINSS(tempxregs[i], R(XMM0));
 			break;
 		case 16: // d[i] = 1.0f / s[i]; break; //vrcp
 			MOVSS(XMM0, M(&one));