Merge pull request #6092 from unknownbrackets/armjit-minor

Clean up some NaN handling in some instructions
This commit is contained in:
Henrik Rydgård 2014-05-16 10:20:25 +02:00
commit 965cdb3832
3 changed files with 66 additions and 20 deletions

View File

@ -194,7 +194,7 @@ namespace MIPSComp
MOVI2F(S1, 1.0f, SCRATCHREG1);
VCMP(fpr.V(vregs[i]), S0);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
SetCC(CC_LE);
SetCC(CC_LS);
VMOV(fpr.V(vregs[i]), S0);
SetCC(CC_AL);
VCMP(fpr.V(vregs[i]), S1);
@ -209,7 +209,7 @@ namespace MIPSComp
MOVI2F(S1, 1.0f, SCRATCHREG1);
VCMP(fpr.V(vregs[i]), S0);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
SetCC(CC_LT);
SetCC(CC_LO);
VMOV(fpr.V(vregs[i]), S0);
SetCC(CC_AL);
VCMP(fpr.V(vregs[i]), S1);
@ -744,6 +744,7 @@ namespace MIPSComp
case 2: // vmin
VCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
VMRS_APSR();
// TODO: Technically should use NaN sign bit.
SetCC(CC_LT);
VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
SetCC(CC_GE);
@ -753,6 +754,7 @@ namespace MIPSComp
case 3: // vmax
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
VMRS_APSR();
// TODO: Technically should use NaN sign bit.
SetCC(CC_LT);
VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
SetCC(CC_GE);
@ -763,6 +765,7 @@ namespace MIPSComp
DISABLE; // pending testing
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
VMRS_APSR();
// Unordered is always 0.
SetCC(CC_GE);
MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
SetCC(CC_LT);
@ -773,9 +776,10 @@ namespace MIPSComp
DISABLE; // pending testing
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
VMRS_APSR();
SetCC(CC_LT);
// Unordered is always 0.
SetCC(CC_LO);
MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
SetCC(CC_GE);
SetCC(CC_HS);
MOVI2F(fpr.V(tempregs[i]), 0.0f, SCRATCHREG1);
SetCC(CC_AL);
break;

View File

@ -512,7 +512,7 @@ namespace MIPSInt
case 0: d[i] = s[i]; break; //vmov
case 1: d[i] = fabsf(s[i]); break; //vabs
case 2: d[i] = -s[i]; break; //vneg
// vsat0 changes -0.0 to +0.0.
// vsat0 changes -0.0 to +0.0, both retain NAN.
case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
case 16: d[i] = 1.0f / s[i]; break; //vrcp
@ -1537,16 +1537,19 @@ namespace MIPSInt
ApplySwizzleS(s, sz);
ReadVector(t, sz, vt);
ApplySwizzleT(t, sz);
// positive NAN always loses, unlike SSE
// negative NAN seems different? TODO
// If both are zero, take t's sign.
// TODO: Otherwise: -NAN < -INF < real < INF < NAN
switch ((op >> 23) & 3) {
case 2: // vmin
for (int i = 0; i < numElements; i++)
d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(s[i], t[i]));
for (int i = 0; i < numElements; i++) {
d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(t[i], s[i]));
}
break;
case 3: // vmax
for (int i = 0; i < numElements; i++)
d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(t[i], s[i]));
break;
default:
_dbg_assert_msg_(CPU,0,"unknown min/max op %d", cond);

View File

@ -165,14 +165,33 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
if (sat == 1)
{
fpr.MapRegV(vregs[i], MAP_DIRTY);
MAXSS(fpr.VX(vregs[i]), M(&zero));
MINSS(fpr.VX(vregs[i]), M(&one));
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
MOVSS(R(XMM0), fpr.VX(vregs[i]));
CMPLESS(XMM0, M(&zero));
ANDNPS(XMM0, fpr.V(vregs[i]));
// Retain a NAN in XMM0 (must be second operand.)
MOVSS(fpr.VX(vregs[i]), M(&one));
MINSS(fpr.VX(vregs[i]), R(XMM0));
}
else if (sat == 3)
{
fpr.MapRegV(vregs[i], MAP_DIRTY);
MAXSS(fpr.VX(vregs[i]), M(&minus_one));
MINSS(fpr.VX(vregs[i]), M(&one));
// Check for < -1.0f, but careful of NANs.
MOVSS(XMM1, M(&minus_one));
MOVSS(R(XMM0), fpr.VX(vregs[i]));
CMPLESS(XMM0, R(XMM1));
// If it was NOT less, the three ops below do nothing.
// Otherwise, they replace the value with -1.0f.
ANDPS(XMM1, R(XMM0));
ANDNPS(XMM0, fpr.V(vregs[i]));
ORPS(XMM0, R(XMM1));
// Retain a NAN in XMM0 (must be second operand.)
MOVSS(fpr.VX(vregs[i]), M(&one));
MINSS(fpr.VX(vregs[i]), R(XMM0));
}
}
}
@ -835,12 +854,15 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
switch ((op >> 23) & 7)
{
case 2: // vmin
// TODO: Mishandles NaN.
MINSS(tempxregs[i], fpr.V(tregs[i]));
break;
case 3: // vmax
// TODO: Mishandles NaN.
MAXSS(tempxregs[i], fpr.V(tregs[i]));
break;
case 6: // vsge
// TODO: Mishandles NaN.
CMPNLTSS(tempxregs[i], fpr.V(tregs[i]));
ANDPS(tempxregs[i], M(&oneOneOneOne));
break;
@ -1560,16 +1582,33 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
// TODO: Doesn't handle NaN correctly.
MAXSS(tempxregs[i], M(&zero));
MINSS(tempxregs[i], M(&one));
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
MOVSS(R(XMM0), tempxregs[i]);
CMPLESS(XMM0, M(&zero));
ANDNPS(XMM0, R(tempxregs[i]));
// Retain a NAN in XMM0 (must be second operand.)
MOVSS(tempxregs[i], M(&one));
MINSS(tempxregs[i], R(XMM0));
break;
case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
// TODO: Doesn't handle NaN correctly.
MAXSS(tempxregs[i], M(&minus_one));
MINSS(tempxregs[i], M(&one));
// Check for < -1.0f, but careful of NANs.
MOVSS(XMM1, M(&minus_one));
MOVSS(R(XMM0), tempxregs[i]);
CMPLESS(XMM0, R(XMM1));
// If it was NOT less, the three ops below do nothing.
// Otherwise, they replace the value with -1.0f.
ANDPS(XMM1, R(XMM0));
ANDNPS(XMM0, R(tempxregs[i]));
ORPS(XMM0, R(XMM1));
// Retain a NAN in XMM0 (must be second operand.)
MOVSS(tempxregs[i], M(&one));
MINSS(tempxregs[i], R(XMM0));
break;
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
MOVSS(XMM0, M(&one));