Merge pull request #14033 from JosJuice/jitarm64-inaccurate-fma-double

JitArm64: Always use double precision for inaccurate FMA
2026-01-31 01:15:17 +01:00 · 2026-01-18 13:52:06 -05:00
parent f8b47c031f addededecf
commit fe668ebc89
2 changed files with 20 additions and 20 deletions
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
@@ -75,26 +75,25 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
  const u32 d = inst.FD;
  const u32 op5 = inst.SUBOP5;

-  const bool use_c = op5 >= 25;  // fmul and all kind of fmaddXX
+  const bool use_c = op5 >= 25;  // fmul and all kinds of fmaddXX
  const bool use_b = op5 != 25;  // fmul uses no B
  const bool fma = use_b && use_c;
  const bool negate_result = (op5 & ~0x1) == 30;

  const bool output_is_single = inst.OPCD == 59;
-  const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA);
-  const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC];
+  const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA);
+  const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[c];

  const auto inputs_are_singles_func = [&] {
    return fpr.IsSingle(a, true) && (!use_b || fpr.IsSingle(b, true)) &&
           (!use_c || fpr.IsSingle(c, true));
  };
-  const bool inputs_are_singles = inputs_are_singles_func();

-  const bool single = inputs_are_singles && output_is_single;
+  const bool single = inputs_are_singles_func() && output_is_single && !inaccurate_fma;
  const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
-  const RegType type_out =
-      output_is_single ? (inputs_are_singles ? RegType::DuplicatedSingle : RegType::Duplicated) :
-                         RegType::LowerPair;
+  const RegType type_out = output_is_single ?
+                               (single ? RegType::DuplicatedSingle : RegType::Duplicated) :
+                               RegType::LowerPair;
  const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;

  const ARM64Reg VA = reg_encoder(fpr.R(a, type));
@@ -109,7 +108,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
    ARM64Reg rounded_c_reg = VC;
    if (round_c)
    {
-      ASSERT_MSG(DYNA_REC, !inputs_are_singles, "Tried to apply 25-bit precision to single");
+      ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single");

      V0Q = fpr.GetScopedReg();
      rounded_c_reg = reg_encoder(V0Q);
@@ -249,7 +248,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)

  if (output_is_single)
  {
-    ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
+    ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(),
               "Register allocation turned singles into doubles in the middle of fp_arith");

    fpr.FixSinglePrecision(d);
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
@@ -94,16 +94,17 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
  const bool negate_result = (op5 & ~0x1) == 30;
  const bool msub = op5 == 28 || op5 == 30;

-  const auto singles_func = [&] {
+  const bool inaccurate_fma = fma && !Config::Get(Config::SESSION_USE_FMA);
+  const bool round_c = use_c && !js.op->fprIsSingle[c];
+
+  const auto inputs_are_singles_func = [&] {
    return fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c));
  };
-  const bool singles = singles_func();

-  const bool inaccurate_fma = !Config::Get(Config::SESSION_USE_FMA);
-  const bool round_c = use_c && !js.op->fprIsSingle[inst.FC];
-  const RegType type = singles ? RegType::Single : RegType::Register;
-  const u8 size = singles ? 32 : 64;
-  const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad;
+  const bool single = inputs_are_singles_func() && !inaccurate_fma;
+  const RegType type = single ? RegType::Single : RegType::Register;
+  const u8 size = single ? 32 : 64;
+  const auto reg_encoder = single ? EncodeRegToDouble : EncodeRegToQuad;

  const ARM64Reg VA = reg_encoder(fpr.R(a, type));
  const ARM64Reg VB = use_b ? reg_encoder(fpr.R(b, type)) : ARM64Reg::INVALID_REG;
@@ -118,7 +119,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
    ARM64Reg rounded_c_reg = VC;
    if (round_c)
    {
-      ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single");
+      ASSERT_MSG(DYNA_REC, !single, "Tried to apply 25-bit precision to single");

      V0Q = fpr.GetScopedReg();
      rounded_c_reg = reg_encoder(V0Q);
@@ -234,7 +235,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
    FixupBranch nan_fixup;
    if (m_accurate_nans)
    {
-      const ARM64Reg nan_temp_reg = singles ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q);
+      const ARM64Reg nan_temp_reg = single ? EncodeRegToSingle(V0Q) : EncodeRegToDouble(V0Q);
      const ARM64Reg nan_temp_reg_paired = reg_encoder(V0Q);

      // Check if we need to handle NaNs
@@ -300,7 +301,7 @@ void JitArm64::ps_arith(UGeckoInstruction inst)
      SetJumpTarget(nan_fixup);
  }

-  ASSERT_MSG(DYNA_REC, singles == singles_func(),
+  ASSERT_MSG(DYNA_REC, single == inputs_are_singles_func(),
             "Register allocation turned singles into doubles in the middle of ps_arith");

  fpr.FixSinglePrecision(d);