mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-01-23 22:36:21 +00:00
Merge pull request #6092 from unknownbrackets/armjit-minor
Clean up some NaN handling in some instructions
This commit is contained in:
commit
965cdb3832
@ -194,7 +194,7 @@ namespace MIPSComp
|
||||
MOVI2F(S1, 1.0f, SCRATCHREG1);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
SetCC(CC_LE);
|
||||
SetCC(CC_LS);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
@ -209,7 +209,7 @@ namespace MIPSComp
|
||||
MOVI2F(S1, 1.0f, SCRATCHREG1);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
SetCC(CC_LT);
|
||||
SetCC(CC_LO);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
@ -744,6 +744,7 @@ namespace MIPSComp
|
||||
case 2: // vmin
|
||||
VCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
|
||||
VMRS_APSR();
|
||||
// TODO: Technically should use NaN sign bit.
|
||||
SetCC(CC_LT);
|
||||
VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
SetCC(CC_GE);
|
||||
@ -753,6 +754,7 @@ namespace MIPSComp
|
||||
case 3: // vmax
|
||||
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
|
||||
VMRS_APSR();
|
||||
// TODO: Technically should use NaN sign bit.
|
||||
SetCC(CC_LT);
|
||||
VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
SetCC(CC_GE);
|
||||
@ -763,6 +765,7 @@ namespace MIPSComp
|
||||
DISABLE; // pending testing
|
||||
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
|
||||
VMRS_APSR();
|
||||
// Unordered is always 0.
|
||||
SetCC(CC_GE);
|
||||
MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
|
||||
SetCC(CC_LT);
|
||||
@ -773,9 +776,10 @@ namespace MIPSComp
|
||||
DISABLE; // pending testing
|
||||
VCMP(fpr.V(tregs[i]), fpr.V(sregs[i]));
|
||||
VMRS_APSR();
|
||||
SetCC(CC_LT);
|
||||
// Unordered is always 0.
|
||||
SetCC(CC_LO);
|
||||
MOVI2F(fpr.V(tempregs[i]), 1.0f, SCRATCHREG1);
|
||||
SetCC(CC_GE);
|
||||
SetCC(CC_HS);
|
||||
MOVI2F(fpr.V(tempregs[i]), 0.0f, SCRATCHREG1);
|
||||
SetCC(CC_AL);
|
||||
break;
|
||||
|
@ -512,7 +512,7 @@ namespace MIPSInt
|
||||
case 0: d[i] = s[i]; break; //vmov
|
||||
case 1: d[i] = fabsf(s[i]); break; //vabs
|
||||
case 2: d[i] = -s[i]; break; //vneg
|
||||
// vsat0 changes -0.0 to +0.0.
|
||||
// vsat0 changes -0.0 to +0.0, both retain NAN.
|
||||
case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
|
||||
case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
|
||||
case 16: d[i] = 1.0f / s[i]; break; //vrcp
|
||||
@ -1537,16 +1537,19 @@ namespace MIPSInt
|
||||
ApplySwizzleS(s, sz);
|
||||
ReadVector(t, sz, vt);
|
||||
ApplySwizzleT(t, sz);
|
||||
// positive NAN always loses, unlike SSE
|
||||
// negative NAN seems different? TODO
|
||||
|
||||
// If both are zero, take t's sign.
|
||||
// TODO: Otherwise: -NAN < -INF < real < INF < NAN
|
||||
|
||||
switch ((op >> 23) & 3) {
|
||||
case 2: // vmin
|
||||
for (int i = 0; i < numElements; i++)
|
||||
d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(s[i], t[i]));
|
||||
for (int i = 0; i < numElements; i++) {
|
||||
d[i] = my_isnan(t[i]) ? s[i] : (my_isnan(s[i]) ? t[i] : std::min(t[i], s[i]));
|
||||
}
|
||||
break;
|
||||
case 3: // vmax
|
||||
for (int i = 0; i < numElements; i++)
|
||||
d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
|
||||
d[i] = my_isnan(t[i]) ? t[i] : (my_isnan(s[i]) ? s[i] : std::max(t[i], s[i]));
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"unknown min/max op %d", cond);
|
||||
|
@ -165,14 +165,33 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
|
||||
if (sat == 1)
|
||||
{
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
MAXSS(fpr.VX(vregs[i]), M(&zero));
|
||||
MINSS(fpr.VX(vregs[i]), M(&one));
|
||||
|
||||
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
|
||||
MOVSS(R(XMM0), fpr.VX(vregs[i]));
|
||||
CMPLESS(XMM0, M(&zero));
|
||||
ANDNPS(XMM0, fpr.V(vregs[i]));
|
||||
|
||||
// Retain a NAN in XMM0 (must be second operand.)
|
||||
MOVSS(fpr.VX(vregs[i]), M(&one));
|
||||
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
||||
}
|
||||
else if (sat == 3)
|
||||
{
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
MAXSS(fpr.VX(vregs[i]), M(&minus_one));
|
||||
MINSS(fpr.VX(vregs[i]), M(&one));
|
||||
|
||||
// Check for < -1.0f, but careful of NANs.
|
||||
MOVSS(XMM1, M(&minus_one));
|
||||
MOVSS(R(XMM0), fpr.VX(vregs[i]));
|
||||
CMPLESS(XMM0, R(XMM1));
|
||||
// If it was NOT less, the three ops below do nothing.
|
||||
// Otherwise, they replace the value with -1.0f.
|
||||
ANDPS(XMM1, R(XMM0));
|
||||
ANDNPS(XMM0, fpr.V(vregs[i]));
|
||||
ORPS(XMM0, R(XMM1));
|
||||
|
||||
// Retain a NAN in XMM0 (must be second operand.)
|
||||
MOVSS(fpr.VX(vregs[i]), M(&one));
|
||||
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -835,12 +854,15 @@ void Jit::Comp_VecDo3(MIPSOpcode op) {
|
||||
switch ((op >> 23) & 7)
|
||||
{
|
||||
case 2: // vmin
|
||||
// TODO: Mishandles NaN.
|
||||
MINSS(tempxregs[i], fpr.V(tregs[i]));
|
||||
break;
|
||||
case 3: // vmax
|
||||
// TODO: Mishandles NaN.
|
||||
MAXSS(tempxregs[i], fpr.V(tregs[i]));
|
||||
break;
|
||||
case 6: // vsge
|
||||
// TODO: Mishandles NaN.
|
||||
CMPNLTSS(tempxregs[i], fpr.V(tregs[i]));
|
||||
ANDPS(tempxregs[i], M(&oneOneOneOne));
|
||||
break;
|
||||
@ -1560,16 +1582,33 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||
case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
// TODO: Doesn't handle NaN correctly.
|
||||
MAXSS(tempxregs[i], M(&zero));
|
||||
MINSS(tempxregs[i], M(&one));
|
||||
|
||||
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
|
||||
MOVSS(R(XMM0), tempxregs[i]);
|
||||
CMPLESS(XMM0, M(&zero));
|
||||
ANDNPS(XMM0, R(tempxregs[i]));
|
||||
|
||||
// Retain a NAN in XMM0 (must be second operand.)
|
||||
MOVSS(tempxregs[i], M(&one));
|
||||
MINSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
// TODO: Doesn't handle NaN correctly.
|
||||
MAXSS(tempxregs[i], M(&minus_one));
|
||||
MINSS(tempxregs[i], M(&one));
|
||||
|
||||
// Check for < -1.0f, but careful of NANs.
|
||||
MOVSS(XMM1, M(&minus_one));
|
||||
MOVSS(R(XMM0), tempxregs[i]);
|
||||
CMPLESS(XMM0, R(XMM1));
|
||||
// If it was NOT less, the three ops below do nothing.
|
||||
// Otherwise, they replace the value with -1.0f.
|
||||
ANDPS(XMM1, R(XMM0));
|
||||
ANDNPS(XMM0, R(tempxregs[i]));
|
||||
ORPS(XMM0, R(XMM1));
|
||||
|
||||
// Retain a NAN in XMM0 (must be second operand.)
|
||||
MOVSS(tempxregs[i], M(&one));
|
||||
MINSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
|
||||
MOVSS(XMM0, M(&one));
|
||||
|
Loading…
x
Reference in New Issue
Block a user