From 8d2c069c3454fee6180483f3bf29fe496d71c5f8 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 13 Jun 2021 14:45:09 +0200 Subject: [PATCH] Interpreter: Return float from ForceSingle Performance optimization, along with making the code a little neater. Saves us from performing a single -> double -> single conversion when calling UpdateFPRFSingle. --- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 6 +- .../Interpreter/Interpreter_FloatingPoint.cpp | 22 +++--- .../Interpreter/Interpreter_Paired.cpp | 76 +++++++++---------- 3 files changed, 47 insertions(+), 57 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index bedc3085d3..37a355338e 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -36,15 +36,13 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask) fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; } -inline double ForceSingle(const UReg_FPSCR& fpscr, double value) +inline float ForceSingle(const UReg_FPSCR& fpscr, double value) { - // convert to float... - float x = (float)value; + float x = static_cast(value); if (!cpu_info.bFlushToZero && fpscr.NI) { x = Common::FlushToZero(x); } - // ...and back to double: return x; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index d02e8ae71b..1c0ff8f3fd 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -290,7 +290,7 @@ void Interpreter::fselx(UGeckoInstruction inst) void Interpreter::frspx(UGeckoInstruction inst) // round to single { const double b = rPS(inst.FB).PS0AsDouble(); - const double rounded = ForceSingle(FPSCR, b); + const float rounded = ForceSingle(FPSCR, b); if (std::isnan(b)) { @@ -349,7 +349,7 @@ void Interpreter::fmulsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { - const double result = ForceSingle(FPSCR, d_value.value); + const float result = ForceSingle(FPSCR, d_value.value); rPS(inst.FD).Fill(result); FPSCR.FI = 0; @@ -390,7 +390,7 @@ void Interpreter::fmaddsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { - const double result = ForceSingle(FPSCR, d_value.value); + const float result = ForceSingle(FPSCR, d_value.value); rPS(inst.FD).Fill(result); FPSCR.FI = d_value.value != result; @@ -428,7 +428,7 @@ void Interpreter::faddsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) { - const double result = ForceSingle(FPSCR, sum.value); + const float result = ForceSingle(FPSCR, sum.value); rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); } @@ -468,7 +468,7 @@ void Interpreter::fdivsx(UGeckoInstruction inst) if (not_divide_by_zero && not_invalid) { - const double result = ForceSingle(FPSCR, quotient.value); + const float result = ForceSingle(FPSCR, quotient.value); rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); } @@ -592,7 +592,7 @@ void Interpreter::fmsubsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceSingle(FPSCR, product.value); + const float result = ForceSingle(FPSCR, product.value); rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); } @@ -633,8 +633,8 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double tmp = ForceSingle(FPSCR, product.value); - const double result = std::isnan(tmp) ? tmp : -tmp; + const float tmp = ForceSingle(FPSCR, product.value); + const float result = std::isnan(tmp) ? tmp : -tmp; rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); @@ -676,8 +676,8 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double tmp = ForceSingle(FPSCR, product.value); - const double result = std::isnan(tmp) ? tmp : -tmp; + const float tmp = ForceSingle(FPSCR, product.value); + const float result = std::isnan(tmp) ? tmp : -tmp; rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); @@ -714,7 +714,7 @@ void Interpreter::fsubsx(UGeckoInstruction inst) if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) { - const double result = ForceSingle(FPSCR, difference.value); + const float result = ForceSingle(FPSCR, difference.value); rPS(inst.FD).Fill(result); PowerPC::UpdateFPRFSingle(result); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index 140433892c..20e5405236 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -113,8 +113,8 @@ void Interpreter::ps_div(UGeckoInstruction inst) const auto& a = rPS(inst.FA); const auto& b = rPS(inst.FB); - const double ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); - const double ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -174,8 +174,8 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) SetFPException(&FPSCR, FPSCR_VXSNAN); - const double dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0)); - const double dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1)); + const float dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0)); + const float dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1)); rPS(inst.FD).SetBoth(dst_ps0, dst_ps1); PowerPC::UpdateFPRFSingle(dst_ps0); @@ -189,8 +189,8 @@ void Interpreter::ps_sub(UGeckoInstruction inst) const auto& a = rPS(inst.FA); const auto& b = rPS(inst.FB); - const double ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); - const double ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -204,8 +204,8 @@ void Interpreter::ps_add(UGeckoInstruction inst) const auto& a = rPS(inst.FA); const auto& b = rPS(inst.FB); - const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); - const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -222,8 +222,8 @@ void Interpreter::ps_mul(UGeckoInstruction inst) const double c0 = Force25Bit(c.PS0AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble()); - const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); - const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); + const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); + const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -241,10 +241,8 @@ void Interpreter::ps_msub(UGeckoInstruction inst) const double c0 = Force25Bit(c.PS0AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble()); - const double ps0 = - ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); - const double ps1 = - ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -262,10 +260,8 @@ void Interpreter::ps_madd(UGeckoInstruction inst) const double c0 = Force25Bit(c.PS0AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble()); - const double ps0 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); - const double ps1 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -283,13 +279,13 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst) const double c0 = Force25Bit(c.PS0AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble()); - const double tmp0 = + const float tmp0 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); - const double tmp1 = + const float tmp1 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); - const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; - const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -307,13 +303,13 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst) const double c0 = Force25Bit(c.PS0AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble()); - const double tmp0 = + const float tmp0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); - const double tmp1 = + const float tmp1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); - const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; - const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -328,8 +324,8 @@ void Interpreter::ps_sum0(UGeckoInstruction inst) const auto& b = rPS(inst.FB); const auto& c = rPS(inst.FC); - const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); - const double ps1 = ForceSingle(FPSCR, c.PS1AsDouble()); + const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, c.PS1AsDouble()); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -344,8 +340,8 @@ void Interpreter::ps_sum1(UGeckoInstruction inst) const auto& b = rPS(inst.FB); const auto& c = rPS(inst.FC); - const double ps0 = ForceSingle(FPSCR, c.PS0AsDouble()); - const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, c.PS0AsDouble()); + const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps1); @@ -360,8 +356,8 @@ void Interpreter::ps_muls0(UGeckoInstruction inst) const auto& c = rPS(inst.FC); const double c0 = Force25Bit(c.PS0AsDouble()); - const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); - const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value); + const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); + const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -376,8 +372,8 @@ void Interpreter::ps_muls1(UGeckoInstruction inst) const auto& c = rPS(inst.FC); const double c1 = Force25Bit(c.PS1AsDouble()); - const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value); - const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); + const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value); + const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -393,10 +389,8 @@ void Interpreter::ps_madds0(UGeckoInstruction inst) const auto& c = rPS(inst.FC); const double c0 = Force25Bit(c.PS0AsDouble()); - const double ps0 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); - const double ps1 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0); @@ -412,10 +406,8 @@ void Interpreter::ps_madds1(UGeckoInstruction inst) const auto& c = rPS(inst.FC); const double c1 = Force25Bit(c.PS1AsDouble()); - const double ps0 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value); - const double ps1 = - ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value); + const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); rPS(inst.FD).SetBoth(ps0, ps1); PowerPC::UpdateFPRFSingle(ps0);