Merge pull request #9811 from JosJuice/fprf-denormal-singles

Fix FPRF handling of denormal singles
This commit is contained in:
Tilka 2021-06-29 03:18:18 +01:00 committed by GitHub
commit 901a4fb5f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 224 additions and 194 deletions

View File

@ -87,7 +87,6 @@ enum PPCFpClass
// Uses PowerPC conventions for the return value, so it can be easily // Uses PowerPC conventions for the return value, so it can be easily
// used directly in CPU emulation. // used directly in CPU emulation.
u32 ClassifyDouble(double dvalue); u32 ClassifyDouble(double dvalue);
// More efficient float version.
u32 ClassifyFloat(float fvalue); u32 ClassifyFloat(float fvalue);
struct BaseAndDec struct BaseAndDec

View File

@ -36,15 +36,13 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
} }
inline double ForceSingle(const UReg_FPSCR& fpscr, double value) inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
{ {
// convert to float... float x = static_cast<float>(value);
float x = (float)value;
if (!cpu_info.bFlushToZero && fpscr.NI) if (!cpu_info.bFlushToZero && fpscr.NI)
{ {
x = Common::FlushToZero(x); x = Common::FlushToZero(x);
} }
// ...and back to double:
return x; return x;
} }

View File

@ -290,7 +290,7 @@ void Interpreter::fselx(UGeckoInstruction inst)
void Interpreter::frspx(UGeckoInstruction inst) // round to single void Interpreter::frspx(UGeckoInstruction inst) // round to single
{ {
const double b = rPS(inst.FB).PS0AsDouble(); const double b = rPS(inst.FB).PS0AsDouble();
const double rounded = ForceSingle(FPSCR, b); const float rounded = ForceSingle(FPSCR, b);
if (std::isnan(b)) if (std::isnan(b))
{ {
@ -302,7 +302,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single
if (!is_snan || FPSCR.VE == 0) if (!is_snan || FPSCR.VE == 0)
{ {
rPS(inst.FD).Fill(rounded); rPS(inst.FD).Fill(rounded);
PowerPC::UpdateFPRF(b); PowerPC::UpdateFPRFSingle(rounded);
} }
FPSCR.ClearFIFR(); FPSCR.ClearFIFR();
@ -311,7 +311,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single
{ {
SetFI(&FPSCR, b != rounded); SetFI(&FPSCR, b != rounded);
FPSCR.FR = fabs(rounded) > fabs(b); FPSCR.FR = fabs(rounded) > fabs(b);
PowerPC::UpdateFPRF(rounded); PowerPC::UpdateFPRFSingle(rounded);
rPS(inst.FD).Fill(rounded); rPS(inst.FD).Fill(rounded);
} }
@ -333,7 +333,7 @@ void Interpreter::fmulx(UGeckoInstruction inst)
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
FPSCR.FI = 0; // are these flags important? FPSCR.FI = 0; // are these flags important?
FPSCR.FR = 0; FPSCR.FR = 0;
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -349,12 +349,12 @@ void Interpreter::fmulsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, d_value.value); const float result = ForceSingle(FPSCR, d_value.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
FPSCR.FI = 0; FPSCR.FI = 0;
FPSCR.FR = 0; FPSCR.FR = 0;
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -372,7 +372,7 @@ void Interpreter::fmaddx(UGeckoInstruction inst)
{ {
const double result = ForceDouble(FPSCR, product.value); const double result = ForceDouble(FPSCR, product.value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -390,12 +390,12 @@ void Interpreter::fmaddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, d_value.value); const float result = ForceSingle(FPSCR, d_value.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
FPSCR.FI = d_value.value != result; FPSCR.FI = d_value.value != result;
FPSCR.FR = 0; FPSCR.FR = 0;
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -413,7 +413,7 @@ void Interpreter::faddx(UGeckoInstruction inst)
{ {
const double result = ForceDouble(FPSCR, sum.value); const double result = ForceDouble(FPSCR, sum.value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -428,9 +428,9 @@ void Interpreter::faddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, sum.value); const float result = ForceSingle(FPSCR, sum.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -450,7 +450,7 @@ void Interpreter::fdivx(UGeckoInstruction inst)
{ {
const double result = ForceDouble(FPSCR, quotient.value); const double result = ForceDouble(FPSCR, quotient.value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
// FR,FI,OX,UX??? // FR,FI,OX,UX???
@ -468,9 +468,9 @@ void Interpreter::fdivsx(UGeckoInstruction inst)
if (not_divide_by_zero && not_invalid) if (not_divide_by_zero && not_invalid)
{ {
const double result = ForceSingle(FPSCR, quotient.value); const float result = ForceSingle(FPSCR, quotient.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -485,7 +485,7 @@ void Interpreter::fresx(UGeckoInstruction inst)
const auto compute_result = [inst](double value) { const auto compute_result = [inst](double value) {
const double result = Common::ApproximateReciprocal(value); const double result = Common::ApproximateReciprocal(value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
}; };
if (b == 0.0) if (b == 0.0)
@ -523,7 +523,7 @@ void Interpreter::frsqrtex(UGeckoInstruction inst)
const auto compute_result = [inst](double value) { const auto compute_result = [inst](double value) {
const double result = Common::ApproximateReciprocalSquareRoot(value); const double result = Common::ApproximateReciprocalSquareRoot(value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
}; };
if (b < 0.0) if (b < 0.0)
@ -574,7 +574,7 @@ void Interpreter::fmsubx(UGeckoInstruction inst)
{ {
const double result = ForceDouble(FPSCR, product.value); const double result = ForceDouble(FPSCR, product.value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -592,9 +592,9 @@ void Interpreter::fmsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, product.value); const float result = ForceSingle(FPSCR, product.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -615,7 +615,7 @@ void Interpreter::fnmaddx(UGeckoInstruction inst)
const double result = std::isnan(tmp) ? tmp : -tmp; const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -633,11 +633,11 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double tmp = ForceSingle(FPSCR, product.value); const float tmp = ForceSingle(FPSCR, product.value);
const double result = std::isnan(tmp) ? tmp : -tmp; const float result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -658,7 +658,7 @@ void Interpreter::fnmsubx(UGeckoInstruction inst)
const double result = std::isnan(tmp) ? tmp : -tmp; const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -676,11 +676,11 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double tmp = ForceSingle(FPSCR, product.value); const float tmp = ForceSingle(FPSCR, product.value);
const double result = std::isnan(tmp) ? tmp : -tmp; const float result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)
@ -698,7 +698,7 @@ void Interpreter::fsubx(UGeckoInstruction inst)
{ {
const double result = ForceDouble(FPSCR, difference.value); const double result = ForceDouble(FPSCR, difference.value);
rPS(inst.FD).SetPS0(result); rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFDouble(result);
} }
if (inst.Rc) if (inst.Rc)
@ -714,9 +714,9 @@ void Interpreter::fsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, difference.value); const float result = ForceSingle(FPSCR, difference.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result); PowerPC::UpdateFPRFSingle(result);
} }
if (inst.Rc) if (inst.Rc)

View File

@ -113,11 +113,11 @@ void Interpreter::ps_div(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -145,7 +145,7 @@ void Interpreter::ps_res(UGeckoInstruction inst)
const double ps1 = Common::ApproximateReciprocal(b); const double ps1 = Common::ApproximateReciprocal(b);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -174,11 +174,11 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
SetFPException(&FPSCR, FPSCR_VXSNAN); SetFPException(&FPSCR, FPSCR_VXSNAN);
const double dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0)); const float dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0));
const double dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1)); const float dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1));
rPS(inst.FD).SetBoth(dst_ps0, dst_ps1); rPS(inst.FD).SetBoth(dst_ps0, dst_ps1);
PowerPC::UpdateFPRF(dst_ps0); PowerPC::UpdateFPRFSingle(dst_ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -189,11 +189,11 @@ void Interpreter::ps_sub(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -204,11 +204,11 @@ void Interpreter::ps_add(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -222,11 +222,11 @@ void Interpreter::ps_mul(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -241,13 +241,11 @@ void Interpreter::ps_msub(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -262,13 +260,11 @@ void Interpreter::ps_madd(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -283,16 +279,16 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = const float tmp0 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = const float tmp1 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -307,16 +303,16 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = const float tmp0 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = const float tmp1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -328,11 +324,11 @@ void Interpreter::ps_sum0(UGeckoInstruction inst)
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, c.PS1AsDouble()); const float ps1 = ForceSingle(FPSCR, c.PS1AsDouble());
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -344,11 +340,11 @@ void Interpreter::ps_sum1(UGeckoInstruction inst)
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(FPSCR, c.PS0AsDouble()); const float ps0 = ForceSingle(FPSCR, c.PS0AsDouble());
const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps1); PowerPC::UpdateFPRFSingle(ps1);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -360,11 +356,11 @@ void Interpreter::ps_muls0(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -376,11 +372,11 @@ void Interpreter::ps_muls1(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -393,13 +389,11 @@ void Interpreter::ps_madds0(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -412,13 +406,11 @@ void Interpreter::ps_madds1(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0); PowerPC::UpdateFPRFSingle(ps0);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();

View File

@ -121,8 +121,11 @@ public:
// Generates a branch that will check if a given bit of a CR register part // Generates a branch that will check if a given bit of a CR register part
// is set or not. // is set or not.
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
void SetFPRFIfNeeded(Gen::X64Reg xmm);
void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single);
void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
bool duplicate = false);
void FinalizeDoubleResult(Gen::X64Reg output, const Gen::OpArg& input);
void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in, void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in,
Gen::X64Reg clobber = Gen::XMM0); Gen::X64Reg clobber = Gen::XMM0);

View File

@ -33,13 +33,63 @@ alignas(16) static const double half_qnan_and_s32_max[2] = {0x7FFFFFFF, -0x80000
// We can avoid calculating FPRF if it's not needed; every float operation resets it, so // We can avoid calculating FPRF if it's not needed; every float operation resets it, so
// if it's going to be clobbered in a future instruction before being read, we can just // if it's going to be clobbered in a future instruction before being read, we can just
// not calculate it. // not calculate it.
void Jit64::SetFPRFIfNeeded(X64Reg xmm) void Jit64::SetFPRFIfNeeded(const OpArg& input, bool single)
{ {
// As far as we know, the games that use this flag only need FPRF for fmul and fmadd, but // As far as we know, the games that use this flag only need FPRF for fmul and fmadd, but
// FPRF is fast enough in JIT that we might as well just enable it for every float instruction // FPRF is fast enough in JIT that we might as well just enable it for every float instruction
// if the FPRF flag is set. // if the FPRF flag is set.
if (SConfig::GetInstance().bFPRF && js.op->wantsFPRF) if (!SConfig::GetInstance().bFPRF || !js.op->wantsFPRF)
SetFPRF(xmm); return;
X64Reg xmm = XMM0;
if (input.IsSimpleReg())
xmm = input.GetSimpleReg();
else
MOVSD(xmm, input);
SetFPRF(xmm, single);
}
void Jit64::FinalizeSingleResult(X64Reg output, const OpArg& input, bool packed, bool duplicate)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jo.accurateSinglePrecision)
{
if (packed)
{
CVTPD2PS(output, input);
SetFPRFIfNeeded(R(output), true);
CVTPS2PD(output, R(output));
}
else
{
CVTSD2SS(output, input);
SetFPRFIfNeeded(R(output), true);
CVTSS2SD(output, R(output));
if (duplicate)
MOVDDUP(output, R(output));
}
}
else
{
if (!input.IsSimpleReg(output))
{
if (duplicate)
MOVDDUP(output, input);
else
MOVAPD(output, input);
}
SetFPRFIfNeeded(input, true);
}
}
void Jit64::FinalizeDoubleResult(X64Reg output, const OpArg& input)
{
if (!input.IsSimpleReg(output))
MOVSD(output, input);
SetFPRFIfNeeded(input, false);
} }
void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Reg clobber) void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Reg clobber)
@ -210,8 +260,9 @@ void Jit64::fp_arith(UGeckoInstruction inst)
HandleNaNs(inst, Rd, dest); HandleNaNs(inst, Rd, dest);
if (single) if (single)
ForceSinglePrecision(Rd, Rd, packed, true); FinalizeSingleResult(Rd, Rd, packed, true);
SetFPRFIfNeeded(Rd); else
FinalizeDoubleResult(Rd, Rd);
}; };
switch (inst.SUBOP5) switch (inst.SUBOP5)
@ -452,14 +503,13 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
if (single) if (single)
{ {
HandleNaNs(inst, result_reg, result_reg, result_reg == XMM1 ? XMM0 : XMM1); HandleNaNs(inst, result_reg, result_reg, result_reg == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(Rd, R(result_reg), packed, true); FinalizeSingleResult(Rd, R(result_reg), packed, true);
} }
else else
{ {
HandleNaNs(inst, result_reg, result_reg, XMM1); HandleNaNs(inst, result_reg, result_reg, XMM1);
MOVSD(Rd, R(result_reg)); FinalizeDoubleResult(Rd, R(result_reg));
} }
SetFPRFIfNeeded(Rd);
} }
void Jit64::fsign(UGeckoInstruction inst) void Jit64::fsign(UGeckoInstruction inst)
@ -763,12 +813,11 @@ void Jit64::frspx(UGeckoInstruction inst)
int d = inst.FD; int d = inst.FD;
bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom; bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
RCOpArg Rb = fpr.Use(b, RCMode::Read); RCOpArg Rb = fpr.Bind(b, RCMode::Read);
RCX64Reg Rd = fpr.Bind(d, RCMode::Write); RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rb, Rd); RegCache::Realize(Rb, Rd);
ForceSinglePrecision(Rd, Rb, packed, true); FinalizeSingleResult(Rd, Rb, packed, true);
SetFPRFIfNeeded(Rd);
} }
void Jit64::frsqrtex(UGeckoInstruction inst) void Jit64::frsqrtex(UGeckoInstruction inst)
@ -786,8 +835,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
MOVAPD(XMM0, Rb); MOVAPD(XMM0, Rb);
CALL(asm_routines.frsqrte); CALL(asm_routines.frsqrte);
MOVSD(Rd, XMM0); FinalizeDoubleResult(Rd, R(XMM0));
SetFPRFIfNeeded(Rd);
} }
void Jit64::fresx(UGeckoInstruction inst) void Jit64::fresx(UGeckoInstruction inst)
@ -806,5 +854,5 @@ void Jit64::fresx(UGeckoInstruction inst)
MOVAPD(XMM0, Rb); MOVAPD(XMM0, Rb);
CALL(asm_routines.fres); CALL(asm_routines.fres);
MOVDDUP(Rd, R(XMM0)); MOVDDUP(Rd, R(XMM0));
SetFPRFIfNeeded(Rd); SetFPRFIfNeeded(R(XMM0), true);
} }

View File

@ -77,8 +77,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
PanicAlertFmt("ps_sum WTF!!!"); PanicAlertFmt("ps_sum WTF!!!");
} }
HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1); HandleNaNs(inst, Rd, tmp, tmp == XMM1 ? XMM0 : XMM1);
ForceSinglePrecision(Rd, Rd); FinalizeSingleResult(Rd, Rd);
SetFPRFIfNeeded(Rd);
} }
void Jit64::ps_muls(UGeckoInstruction inst) void Jit64::ps_muls(UGeckoInstruction inst)
@ -112,8 +111,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
Force25BitPrecision(XMM1, R(XMM1), XMM0); Force25BitPrecision(XMM1, R(XMM1), XMM0);
MULPD(XMM1, Ra); MULPD(XMM1, Ra);
HandleNaNs(inst, Rd, XMM1); HandleNaNs(inst, Rd, XMM1);
ForceSinglePrecision(Rd, Rd); FinalizeSingleResult(Rd, Rd);
SetFPRFIfNeeded(Rd);
} }
void Jit64::ps_mergeXX(UGeckoInstruction inst) void Jit64::ps_mergeXX(UGeckoInstruction inst)
@ -171,8 +169,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst)
CALL(asm_routines.frsqrte); CALL(asm_routines.frsqrte);
MOVLHPS(Rd, XMM0); MOVLHPS(Rd, XMM0);
ForceSinglePrecision(Rd, Rd); FinalizeSingleResult(Rd, Rd);
SetFPRFIfNeeded(Rd);
} }
void Jit64::ps_res(UGeckoInstruction inst) void Jit64::ps_res(UGeckoInstruction inst)
@ -196,8 +193,7 @@ void Jit64::ps_res(UGeckoInstruction inst)
CALL(asm_routines.fres); CALL(asm_routines.fres);
MOVLHPS(Rd, XMM0); MOVLHPS(Rd, XMM0);
ForceSinglePrecision(Rd, Rd); FinalizeSingleResult(Rd, Rd);
SetFPRFIfNeeded(Rd);
} }
void Jit64::ps_cmpXX(UGeckoInstruction inst) void Jit64::ps_cmpXX(UGeckoInstruction inst)

View File

@ -727,34 +727,6 @@ void EmuCodeBlock::JitClearCA()
MOV(8, PPCSTATE(xer_ca), Imm8(0)); MOV(8, PPCSTATE(xer_ca), Imm8(0));
} }
void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool packed,
bool duplicate)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (m_jit.jo.accurateSinglePrecision)
{
if (packed)
{
CVTPD2PS(output, input);
CVTPS2PD(output, R(output));
}
else
{
CVTSD2SS(output, input);
CVTSS2SD(output, R(output));
if (duplicate)
MOVDDUP(output, R(output));
}
}
else if (!input.IsSimpleReg(output))
{
if (duplicate)
MOVDDUP(output, input);
else
MOVAPD(output, input);
}
}
// Abstract between AVX and SSE: automatically handle 3-operand instructions // Abstract between AVX and SSE: automatically handle 3-operand instructions
void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
void (XEmitter::*sseOp)(X64Reg, const OpArg&), X64Reg regOp, void (XEmitter::*sseOp)(X64Reg, const OpArg&), X64Reg regOp,
@ -907,30 +879,35 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
MOVDDUP(dst, R(dst)); MOVDDUP(dst, R(dst));
} }
alignas(16) static const u64 psDoubleExp[2] = {0x7FF0000000000000ULL, 0}; alignas(16) static const u64 psDoubleExp[2] = {Common::DOUBLE_EXP, 0};
alignas(16) static const u64 psDoubleFrac[2] = {0x000FFFFFFFFFFFFFULL, 0}; alignas(16) static const u64 psDoubleFrac[2] = {Common::DOUBLE_FRAC, 0};
alignas(16) static const u64 psDoubleNoSign[2] = {0x7FFFFFFFFFFFFFFFULL, 0}; alignas(16) static const u64 psDoubleNoSign[2] = {~Common::DOUBLE_SIGN, 0};
alignas(16) static const u32 psFloatExp[4] = {Common::FLOAT_EXP, 0, 0, 0};
alignas(16) static const u32 psFloatFrac[4] = {Common::FLOAT_FRAC, 0, 0, 0};
alignas(16) static const u32 psFloatNoSign[4] = {~Common::FLOAT_SIGN, 0, 0, 0};
// TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, // TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer,
// storing // storing the result of each floating point op and calculating it when needed. This is trickier
// the result of each floating point op and calculating it when needed. This is trickier than for // than for integers though, because there's 32 possible FPRF bit combinations but only 9 categories
// integers // of floating point values. Fortunately, PPCAnalyzer can optimize out a large portion of FPRF
// though, because there's 32 possible FPRF bit combinations but only 9 categories of floating point // calculations, so maybe this isn't quite that necessary.
// values, void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
// which makes the whole thing rather trickier.
// Fortunately, PPCAnalyzer can optimize out a large portion of FPRF calculations, so maybe this
// isn't
// quite that necessary.
void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
{ {
const int input_size = single ? 32 : 64;
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
FixupBranch continue1, continue2, continue3, continue4; FixupBranch continue1, continue2, continue3, continue4;
if (cpu_info.bSSE4_1) if (cpu_info.bSSE4_1)
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. // Get the sign bit; almost all the branches need it.
PTEST(xmm, MConst(psDoubleExp)); SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
if (single)
PTEST(xmm, MConst(psFloatExp));
else
PTEST(xmm, MConst(psDoubleExp));
FixupBranch maxExponent = J_CC(CC_C); FixupBranch maxExponent = J_CC(CC_C);
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
@ -940,7 +917,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
continue1 = J(); continue1 = J();
SetJumpTarget(maxExponent); SetJumpTarget(maxExponent);
PTEST(xmm, MConst(psDoubleFrac)); if (single)
PTEST(xmm, MConst(psFloatFrac));
else
PTEST(xmm, MConst(psDoubleFrac));
FixupBranch notNAN = J_CC(CC_Z); FixupBranch notNAN = J_CC(CC_Z);
// Max exponent + mantissa: PPC_FPCLASS_QNAN // Max exponent + mantissa: PPC_FPCLASS_QNAN
@ -955,7 +935,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
continue3 = J(); continue3 = J();
SetJumpTarget(zeroExponent); SetJumpTarget(zeroExponent);
PTEST(xmm, MConst(psDoubleNoSign)); if (single)
PTEST(xmm, MConst(psFloatNoSign));
else
PTEST(xmm, MConst(psDoubleNoSign));
FixupBranch zero = J_CC(CC_Z); FixupBranch zero = J_CC(CC_Z);
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD; // No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
@ -971,37 +954,58 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
else else
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
TEST(64, R(RSCRATCH), MConst(psDoubleExp)); if (single)
TEST(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
else
TEST(64, R(RSCRATCH), MConst(psDoubleExp));
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
AND(64, R(RSCRATCH), MConst(psDoubleNoSign));
CMP(64, R(RSCRATCH), MConst(psDoubleExp)); if (single)
{
AND(32, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
CMP(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
}
else
{
AND(64, R(RSCRATCH), MConst(psDoubleNoSign));
CMP(64, R(RSCRATCH), MConst(psDoubleExp));
}
FixupBranch nan = FixupBranch nan =
J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
FixupBranch infinity = J_CC(CC_E); FixupBranch infinity = J_CC(CC_E);
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RSCRATCH), Imm8(63)); SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
LEA(32, RSCRATCH, LEA(32, RSCRATCH,
MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN)); MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN));
continue1 = J(); continue1 = J();
SetJumpTarget(nan); SetJumpTarget(nan);
MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN)); MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN));
continue2 = J(); continue2 = J();
SetJumpTarget(infinity); SetJumpTarget(infinity);
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RSCRATCH), Imm8(63)); SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
LEA(32, RSCRATCH, LEA(32, RSCRATCH,
MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF, MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF,
Common::PPC_FPCLASS_PINF)); Common::PPC_FPCLASS_PINF));
continue3 = J(); continue3 = J();
SetJumpTarget(zeroExponent); SetJumpTarget(zeroExponent);
TEST(64, R(RSCRATCH), MConst(psDoubleNoSign)); if (single)
TEST(input_size, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
else
TEST(input_size, R(RSCRATCH), MConst(psDoubleNoSign));
FixupBranch zero = J_CC(CC_Z); FixupBranch zero = J_CC(CC_Z);
SHR(64, R(RSCRATCH), Imm8(63));
SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
LEA(32, RSCRATCH, LEA(32, RSCRATCH,
MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD)); MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD));
continue4 = J(); continue4 = J();
SetJumpTarget(zero); SetJumpTarget(zero);
SHR(64, R(RSCRATCH), Imm8(63)); SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
SHL(32, R(RSCRATCH), Imm8(4)); SHL(32, R(RSCRATCH), Imm8(4));
ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ)); ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ));
} }

View File

@ -117,14 +117,12 @@ public:
void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&, u8), Gen::X64Reg regOp, void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&, u8), Gen::X64Reg regOp,
const Gen::OpArg& arg1, const Gen::OpArg& arg2, u8 imm); const Gen::OpArg& arg1, const Gen::OpArg& arg2, u8 imm);
void ForceSinglePrecision(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
bool duplicate = false);
void Force25BitPrecision(Gen::X64Reg output, const Gen::OpArg& input, Gen::X64Reg tmp); void Force25BitPrecision(Gen::X64Reg output, const Gen::OpArg& input, Gen::X64Reg tmp);
// RSCRATCH might get trashed // RSCRATCH might get trashed
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false); void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
void SetFPRF(Gen::X64Reg xmm); void SetFPRF(Gen::X64Reg xmm, bool single);
void Clear(); void Clear();
protected: protected:

View File

@ -455,25 +455,12 @@ void JitArm64::GenerateFPRF(bool single)
FixupBranch nan_or_inf = B(CCFlags::CC_EQ); FixupBranch nan_or_inf = B(CCFlags::CC_EQ);
// exp != 0 && exp != EXP_MASK // exp != 0 && exp != EXP_MASK
const u8* normal = GetCodePtr();
emit_write_fprf_and_ret(); emit_write_fprf_and_ret();
// exp == 0 // exp == 0
SetJumpTarget(zero_or_denormal); SetJumpTarget(zero_or_denormal);
TSTI2R(input_reg, INPUT_FRAC_MASK); TSTI2R(input_reg, INPUT_FRAC_MASK);
FixupBranch denormal; FixupBranch denormal = B(CCFlags::CC_NEQ);
if (single)
{
// To match the interpreter, what we output should be based on how the input would be classified
// after conversion to double. Converting a denormal single to a double always results in a
// normal double, so for denormal singles we need to output PPC_FPCLASS_PN/PPC_FPCLASS_NN.
// TODO: Hardware test that the interpreter actually is correct.
B(CCFlags::CC_NEQ, normal);
}
else
{
denormal = B(CCFlags::CC_NEQ);
}
// exp == 0 && frac == 0 // exp == 0 && frac == 0
LSR(ARM64Reg::W1, fprf_reg, 3); LSR(ARM64Reg::W1, fprf_reg, 3);
@ -483,8 +470,7 @@ void JitArm64::GenerateFPRF(bool single)
emit_write_fprf_and_ret(); emit_write_fprf_and_ret();
// exp == 0 && frac != 0 // exp == 0 && frac != 0
if (!single) SetJumpTarget(denormal);
SetJumpTarget(denormal);
ORRI2R(fprf_reg, fprf_reg, Common::PPC_FPCLASS_PD & ~OUTPUT_SIGN_MASK); ORRI2R(fprf_reg, fprf_reg, Common::PPC_FPCLASS_PD & ~OUTPUT_SIGN_MASK);
B(write_fprf_and_ret); B(write_fprf_and_ret);

View File

@ -626,11 +626,16 @@ void PowerPCState::SetSR(u32 index, u32 value)
// FPSCR update functions // FPSCR update functions
void UpdateFPRF(double dvalue) void UpdateFPRFDouble(double dvalue)
{ {
FPSCR.FPRF = Common::ClassifyDouble(dvalue); FPSCR.FPRF = Common::ClassifyDouble(dvalue);
} }
void UpdateFPRFSingle(float fvalue)
{
FPSCR.FPRF = Common::ClassifyFloat(fvalue);
}
void RoundingModeUpdated() void RoundingModeUpdated()
{ {
// The rounding mode is separate for each thread, so this must run on the CPU thread // The rounding mode is separate for each thread, so this must run on the CPU thread

View File

@ -304,7 +304,8 @@ inline void SetXER_OV(bool value)
SetXER_SO(value); SetXER_SO(value);
} }
void UpdateFPRF(double dvalue); void UpdateFPRFDouble(double dvalue);
void UpdateFPRFSingle(float fvalue);
void RoundingModeUpdated(); void RoundingModeUpdated();

View File

@ -74,14 +74,14 @@ TEST(JitArm64, FPRF)
for (const u64 double_input : double_test_values) for (const u64 double_input : double_test_values)
{ {
const u32 expected_double = const u32 expected_double =
RunUpdateFPRF([&] { PowerPC::UpdateFPRF(Common::BitCast<double>(double_input)); }); RunUpdateFPRF([&] { PowerPC::UpdateFPRFDouble(Common::BitCast<double>(double_input)); });
const u32 actual_double = RunUpdateFPRF([&] { test.fprf_double(double_input); }); const u32 actual_double = RunUpdateFPRF([&] { test.fprf_double(double_input); });
EXPECT_EQ(expected_double, actual_double); EXPECT_EQ(expected_double, actual_double);
const u32 single_input = ConvertToSingle(double_input); const u32 single_input = ConvertToSingle(double_input);
const u32 expected_single = RunUpdateFPRF( const u32 expected_single =
[&] { PowerPC::UpdateFPRF(Common::BitCast<double>(ConvertToDouble(single_input))); }); RunUpdateFPRF([&] { PowerPC::UpdateFPRFSingle(Common::BitCast<float>(single_input)); });
const u32 actual_single = RunUpdateFPRF([&] { test.fprf_single(single_input); }); const u32 actual_single = RunUpdateFPRF([&] { test.fprf_single(single_input); });
EXPECT_EQ(expected_single, actual_single); EXPECT_EQ(expected_single, actual_single);
} }