diff --git a/Common/CommonFloat.h b/Common/CommonFloat.h deleted file mode 100644 index 897fe35..0000000 --- a/Common/CommonFloat.h +++ /dev/null @@ -1,97 +0,0 @@ -#pragma once - -#include "Common/CommonTypes.h" -#include - -enum class RoundingMode -{ - Nearest = 0b00, - TowardsZero = 0b01, - TowardsPositiveInfinity = 0b10, - TowardsNegativeInfinity = 0b11, -}; - -enum : u64 -{ - DOUBLE_SIGN = 0x8000000000000000ULL, - DOUBLE_EXP = 0x7FF0000000000000ULL, - DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL, - DOUBLE_ZERO = 0x0000000000000000ULL, - DOUBLE_QBIT = 0x0008000000000000ULL, -}; -constexpr u64 DOUBLE_FRAC_WIDTH = 52; -constexpr u64 DOUBLE_SIGN_SHIFT = 63; - -enum : u32 -{ - FLOAT_SIGN = 0x80000000, - FLOAT_EXP = 0x7F800000, - FLOAT_FRAC = 0x007FFFFF, - FLOAT_ZERO = 0x00000000, -}; -constexpr u32 FLOAT_FRAC_WIDTH = 23; - -double frsqrte_expected(double val) -{ - static const int estimate_base[] = { - 0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000, - 0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000, - 0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000, - 0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800, - }; - static const int estimate_dec[] = { - 0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2, - 0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2, - 0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b, - }; - - union - { - double valf; - s64 vali; - }; - valf = val; - s64 mantissa = vali & DOUBLE_FRAC; - s64 sign = vali & DOUBLE_SIGN; - s64 exponent = vali & DOUBLE_EXP; - - // Special case 0 - if (mantissa == 0 && exponent == 0) - return sign ? -std::numeric_limits::infinity() : - std::numeric_limits::infinity(); - // Special case NaN-ish numbers - if (exponent == DOUBLE_EXP) - { - if (mantissa == 0) - { - if (sign) - return std::numeric_limits::quiet_NaN(); - return 0.0; - } - return 0.0 + valf; - } - // Negative numbers return NaN - if (sign) - return std::numeric_limits::quiet_NaN(); - - if (!exponent) - { - // "Normalize" denormal values - do - { - exponent -= 1LL << 52; - mantissa <<= 1; - } while (!(mantissa & (1LL << 52))); - mantissa &= DOUBLE_FRAC; - exponent += 1LL << 52; - } - - bool odd_exponent = !(exponent & (1LL << 52)); - exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); - - int i = (int)(mantissa >> 37); - vali = sign | exponent; - int index = i / 2048 + (odd_exponent ? 16 : 0); - vali |= (s64)(estimate_base[index] - estimate_dec[index] * (i % 2048)) << 26; - return valf; -} diff --git a/Common/FloatUtils.h b/Common/FloatUtils.h new file mode 100644 index 0000000..75af58c --- /dev/null +++ b/Common/FloatUtils.h @@ -0,0 +1,272 @@ +#pragma once + +#include "Common/CommonTypes.h" +#include "Common/BitUtils.h" +#include +#include + +#include +#include + +enum class RoundingMode +{ + Nearest = 0b00, + TowardsZero = 0b01, + TowardsPositiveInfinity = 0b10, + TowardsNegativeInfinity = 0b11, +}; + +constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL; +constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL; +constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL; +constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL; +constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL; +constexpr u64 DOUBLE_FRAC_WIDTH = 52; +constexpr u64 DOUBLE_SIGN_SHIFT = 63; + +constexpr u32 FLOAT_SIGN = 0x80000000; +constexpr u32 FLOAT_EXP = 0x7F800000; +constexpr u32 FLOAT_FRAC = 0x007FFFFF; +constexpr u32 FLOAT_ZERO = 0x00000000; +constexpr u32 FLOAT_FRAC_WIDTH = 23; + + +u64 TruncateMantissaBits(u64 bits) +{ + // Truncate the bits (doesn't depend on rounding mode) + constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH; + constexpr u64 remove_mask = (1 << remove_bits) - 1; + return bits & ~remove_mask; +} + +// Only used on its own by ps_sum1, for some reason +inline u64 RoundMantissaBitsAssumeFinite(u64 bits, RoundingMode rounding_mode) +{ + // Round bits in software rather than relying on any hardware float functions + constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH; + constexpr u64 remove_mask = (1 << remove_bits) - 1; + + u64 round_down = bits & ~remove_mask; + u64 masked_bits = bits & remove_mask; + + // Only round up if the result wouldn't be exact otherwise! + u64 round_up = round_down + (bits == round_down ? 0 : 1 << remove_bits); + if ((bits & DOUBLE_EXP) == 0) { + round_up &= ~DOUBLE_EXP; + } + + u64 even_split = 1 << (remove_bits - 1); + + switch (rounding_mode) + { + case RoundingMode::Nearest: + // Round to nearest (ties even) + if (masked_bits > even_split || + (masked_bits == even_split && (bits & (1 << remove_bits)) != 0)) + { + return round_up; + } + else + { + return round_down; + } + case RoundingMode::TowardsZero: + return round_down; + case RoundingMode::TowardsPositiveInfinity: + if ((bits & DOUBLE_SIGN) == 0) + { + return round_up; + } + else + { + return round_down; + } + case RoundingMode::TowardsNegativeInfinity: + if ((bits & DOUBLE_SIGN) != 0) + { + return round_up; + } + else + { + return round_down; + } + default: + // Unreachable + return 0; + } +} + +inline u64 RoundMantissaBits(u64 bits, RoundingMode rounding_mode) +{ + if ((bits & DOUBLE_EXP) == DOUBLE_EXP) + { + // For infinite and NaN values, the mantissa is simply truncated + return TruncateMantissaBits(bits); + } + + return RoundMantissaBitsAssumeFinite(bits, rounding_mode); +} + +inline float RoundToFloatWithMode(double input, RoundingMode rounding_mode) +{ + float result; + u64 round_mode_bits = static_cast(rounding_mode); + double scratch; + + asm volatile ( + "mffs %1\n" + "mtfsf 7, %0\n" + "frsp %2, %3\n" + "mtfsf 7, %1\n" + : "+f"(round_mode_bits), "=f"(scratch), "=f"(result) + : "f"(input) + ); + + return result; +} + + +double frsqrte_expected(double val) +{ + static const int estimate_base[] = { + 0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000, + 0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000, + 0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000, + 0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800, + }; + static const int estimate_dec[] = { + 0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2, + 0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2, + 0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b, + }; + + union + { + double valf; + s64 vali; + }; + valf = val; + s64 mantissa = vali & DOUBLE_FRAC; + s64 sign = vali & DOUBLE_SIGN; + s64 exponent = vali & DOUBLE_EXP; + + // Special case 0 + if (mantissa == 0 && exponent == 0) + return sign ? -std::numeric_limits::infinity() : + std::numeric_limits::infinity(); + // Special case NaN-ish numbers + if (exponent == DOUBLE_EXP) + { + if (mantissa == 0) + { + if (sign) + return std::numeric_limits::quiet_NaN(); + return 0.0; + } + return 0.0 + valf; + } + // Negative numbers return NaN + if (sign) + return std::numeric_limits::quiet_NaN(); + + if (!exponent) + { + // "Normalize" denormal values + do + { + exponent -= 1LL << 52; + mantissa <<= 1; + } while (!(mantissa & (1LL << 52))); + mantissa &= DOUBLE_FRAC; + exponent += 1LL << 52; + } + + bool odd_exponent = !(exponent & (1LL << 52)); + exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); + + int i = (int)(mantissa >> 37); + vali = sign | exponent; + int index = i / 2048 + (odd_exponent ? 16 : 0); + vali |= (s64)(estimate_base[index] - estimate_dec[index] * (i % 2048)) << 26; + return valf; +} + + +double fres_expected(double val) +{ + static const s32 estimate_base[] = { + 0xfff000, 0xf07000, 0xe1d400, 0xd41000, 0xc71000, 0xbac400, 0xaf2000, 0xa41000, + 0x999000, 0x8f9400, 0x861000, 0x7d0000, 0x745800, 0x6c1000, 0x642800, 0x5c9400, + 0x555000, 0x4e5800, 0x47ac00, 0x413c00, 0x3b1000, 0x352000, 0x2f5c00, 0x29f000, + 0x248800, 0x1f7c00, 0x1a7000, 0x15bc00, 0x110800, 0x0ca000, 0x083800, 0x041800, + }; + static const s32 estimate_dec[] = { + -0x3e1, -0x3a7, -0x371, -0x340, -0x313, -0x2ea, -0x2c4, -0x2a0, -0x27f, -0x261, -0x245, + -0x22a, -0x212, -0x1fb, -0x1e5, -0x1d1, -0x1be, -0x1ac, -0x19b, -0x18b, -0x17c, -0x16e, + -0x15b, -0x15b, -0x143, -0x143, -0x12d, -0x12d, -0x11a, -0x11a, -0x108, -0x106, + }; + + union + { + float valf; + u32 vali; + }; + u64 full_bits = Common::BitCast(val); + valf = RoundToFloatWithMode(val, RoundingMode::TowardsZero); + u32 mantissa = vali & FLOAT_FRAC; + u32 sign = vali & FLOAT_SIGN; + s32 exponent = static_cast(vali & FLOAT_EXP); + + // Special case 0 + if (exponent == 0 && mantissa < 0x200000) + { + if ((full_bits & ~DOUBLE_SIGN) == 0) + { + return sign ? -std::numeric_limits::infinity() : + std::numeric_limits::infinity(); + } + else + { + return sign ? -FLT_MAX : FLT_MAX; + } + } + + // Special case NaN-ish numbers + if ((full_bits & DOUBLE_EXP) >= 0x47f0000000000000ULL) + { + // If it's not NaN, it's infinite! + if (valf == valf) + return sign ? -0.0 : 0.0; + return 0.0 + val; + } + + // Number is denormal, shift the mantissa and adjust the exponent + if (exponent == 0) + { + mantissa <<= 1; + while ((mantissa & FLOAT_EXP) == 0) { + mantissa <<= 1; + exponent -= static_cast(1 << FLOAT_FRAC_WIDTH); + } + + mantissa &= FLOAT_FRAC; + } + + exponent = (253 << FLOAT_FRAC_WIDTH) - exponent; + + u32 key = mantissa >> 18; + u32 new_mantissa = static_cast(estimate_base[key] + estimate_dec[key] * static_cast((mantissa >> 8) & 0x3ff)) >> 1; + + if (exponent <= 0) + { + // Result is subnormal, format it properly! + u32 shift = 1 + (static_cast(-exponent) >> FLOAT_FRAC_WIDTH); + vali = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift); + } + else + { + // Result is normal, just string things together + vali = sign | static_cast(exponent) | new_mantissa; + } + return static_cast(valf); +} \ No newline at end of file diff --git a/cputest/fctiw.cpp b/cputest/fctiw.cpp index e5a1264..7e50651 100644 --- a/cputest/fctiw.cpp +++ b/cputest/fctiw.cpp @@ -4,7 +4,7 @@ #include #include "Common/BitUtils.h" -#include "Common/CommonFloat.h" +#include "Common/FloatUtils.h" #include "Common/hwtests.h" // Algorithm adapted from Appendix C.4.2 in PowerPC Microprocessor Family: diff --git a/cputest/pairedmove.cpp b/cputest/pairedmove.cpp index 7c06b62..e48b122 100644 --- a/cputest/pairedmove.cpp +++ b/cputest/pairedmove.cpp @@ -2,77 +2,10 @@ #include #include "Common/BitUtils.h" -#include "Common/CommonFloat.h" +#include "Common/FloatUtils.h" #include "Common/hwtests.h" -static u64 TruncateMantissaBits(u64 bits) -{ - // Truncate the bits (doesn't depend on rounding mode) - constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH; - constexpr u64 remove_mask = (1 << remove_bits) - 1; - return bits & ~remove_mask; -} - -inline u64 RoundMantissaBits(u64 bits, RoundingMode rounding_mode) -{ - // Round bits in software rather than relying on any hardware float functions - constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH; - constexpr u64 remove_mask = (1 << remove_bits) - 1; - - u64 round_down = bits & ~remove_mask; - u64 masked_bits = bits & remove_mask; - - if ((bits & DOUBLE_EXP) == DOUBLE_EXP) - { - // For infinite and NaN values, the mantissa is simply truncated - return round_down; - } - - // Only round up if the result wouldn't be exact otherwise! - u64 round_up = round_down + (bits == round_down ? 0 : 1 << remove_bits); - u64 even_split = 1 << (remove_bits - 1); - - switch (rounding_mode) - { - case RoundingMode::Nearest: - // Round to nearest (ties even) - if (masked_bits > even_split || - (masked_bits == even_split && (bits & (1 << remove_bits)) != 0)) - { - return round_up; - } - else - { - return round_down; - } - case RoundingMode::TowardsZero: - return round_down; - case RoundingMode::TowardsPositiveInfinity: - if ((bits & DOUBLE_SIGN) == 0) - { - return round_up; - } - else - { - return round_down; - } - case RoundingMode::TowardsNegativeInfinity: - if ((bits & DOUBLE_SIGN) != 0) - { - return round_up; - } - else - { - return round_down; - } - default: - // Unreachable - return 0; - } -} - - static void MergeTest(const u64* input_ptr, RoundingMode rounding_mode) { double result_ps0; @@ -81,7 +14,10 @@ static void MergeTest(const u64* input_ptr, RoundingMode rounding_mode) u64 expected_ps0 = RoundMantissaBits(input, rounding_mode); u64 expected_ps1 = TruncateMantissaBits(input); - asm volatile ("lfd %1, 0(%2)\n" + asm volatile ("ps_mr %1, %1\n" + "isync\n" + "lfd %1, 0(%2)\n" + "isync\n" "ps_merge00 %0, %1, %1\n" "ps_merge11 %1, %0, %0\n" : "=f"(result_ps0), "=f"(result_ps1) @@ -91,7 +27,7 @@ static void MergeTest(const u64* input_ptr, RoundingMode rounding_mode) u64 result_ps0_bits = Common::BitCast(result_ps0); u64 result_ps1_bits = Common::BitCast(result_ps1); - DO_TEST(result_ps0_bits == expected_ps0 + DO_TEST(result_ps0_bits == expected_ps0 && result_ps1_bits == expected_ps1, "ps_merge 0x{:016x} ({}):\n" " got 0x{:016x} ({}) 0x{:016x} ({})\n" @@ -139,7 +75,9 @@ static void NegTest(const u64* input_ptr, RoundingMode rounding_mode) asm volatile ("lfd %0, 0(%2)\n" "ps_merge00 %0, %0, %0\n" + "isync\n" "lfd %0, 0(%2)\n" + "isync\n" "ps_neg %0, %0\n" "ps_merge11 %1, %0, %0\n" : "=f"(result_ps0), "=f"(result_ps1) @@ -172,7 +110,9 @@ void AbsTest(const u64* input_ptr, RoundingMode rounding_mode) asm volatile ("lfd %0, 0(%2)\n" "ps_merge00 %0, %0, %0\n" + "isync\n" "lfd %0, 0(%2)\n" + "isync\n" "ps_abs %0, %0\n" "ps_merge11 %1, %0, %0\n" : "=f"(result_ps0), "=f"(result_ps1) @@ -205,7 +145,9 @@ static void NabsTest(const u64* input_ptr, RoundingMode rounding_mode) asm volatile ("lfd %0, 0(%2)\n" "ps_merge00 %0, %0, %0\n" + "isync\n" "lfd %0, 0(%2)\n" + "isync\n" "ps_nabs %0, %0\n" "ps_merge11 %1, %0, %0\n" : "=f"(result_ps0), "=f"(result_ps1) @@ -227,6 +169,169 @@ static void NabsTest(const u64* input_ptr, RoundingMode rounding_mode) expected_ps1, Common::BitCast(expected_ps1)); } +void SelTest(const u64* input_ptr, RoundingMode rounding_mode) +{ + // Only tests the select taken case + // The untaken case should count as an error as well + double result0_ps0; + double result0_ps1; + double result1_ps0; + double result1_ps1; + + u64 input = *input_ptr; + u64 expected_ps0 = RoundMantissaBits(input, rounding_mode); + u64 expected_ps1 = TruncateMantissaBits(input); + + double one = 1.0; + + asm volatile ("lfd %0, 0(%5)\n" + "ps_merge00 %0, %0, %0\n" + "isync\n" + "lfd %0, 0(%5)\n" + "isync\n" + "ps_merge00 %0, %0, %0\n" + "lfd %2, 0(%5)\n" + "ps_merge00 %2, %2, %2\n" + "isync\n" + "lfd %2, 0(%5)\n" + "isync\n" + "ps_merge00 %2, %2, %2\n" + "ps_merge00 %4, %4, %4\n" + "ps_sel %0, %4, %0, %4\n" + "ps_merge11 %1, %0, %0\n" + "ps_neg %4, %4\n" + "ps_sel %2, %4, %4, %2\n" + "ps_merge11 %3, %2, %2\n" + : "=f"(result0_ps0), "=f"(result0_ps1), "=f"(result1_ps0), "=f"(result1_ps1), "+f"(one) + : "r"(input_ptr) + ); + + u64 result0_ps0_bits = Common::BitCast(result0_ps0); + u64 result0_ps1_bits = Common::BitCast(result0_ps1); + u64 result1_ps0_bits = Common::BitCast(result1_ps0); + u64 result1_ps1_bits = Common::BitCast(result1_ps1); + + DO_TEST(result0_ps0_bits == expected_ps0 + && result0_ps1_bits == expected_ps1 + && result1_ps0_bits == expected_ps0 + && result1_ps1_bits == expected_ps1, + "ps_sel 0x{:016x} ({}):\n" + " got >=0: 0x{:016x} ({}) 0x{:016x} ({})\n" + " <0: 0x{:016x} ({}) 0x{:016x} ({})\n" + "expected 0x{:016x} ({}) 0x{:016x} ({})", + input, Common::BitCast(input), + result0_ps0_bits, result0_ps0, + result0_ps1_bits, result0_ps1, + result1_ps0_bits, result1_ps0, + result1_ps1_bits, result1_ps1, + expected_ps0, Common::BitCast(expected_ps0), + expected_ps1, Common::BitCast(expected_ps1)); +} + +static void Sum0Test(const u64* input_ptr) +{ + // Only checks PS1 because PS0 should be rounded to a float, + // which isn't a move operation + double result_ps1; + double one = 1.0; + u64 input = *input_ptr; + u64 expected_ps1 = TruncateMantissaBits(input); + + asm volatile ("lfd %0, 0(%2)\n" + "ps_merge00 %0, %0, %0\n" + "isync\n" + "lfd %0, 0(%2)\n" + "isync\n" + "ps_sum0 %0, %0, %0, %1\n" + "ps_merge11 %0, %0, %0\n" + : "=f"(result_ps1) + : "f"(one), "r"(input_ptr) + ); + + u64 result_ps1_bits = Common::BitCast(result_ps1); + + DO_TEST(result_ps1_bits == expected_ps1, + "ps_sum0 0x{:016x} ({}):\n" + " got 0x{:016x} ({})\n" + "expected 0x{:016x} ({})", + input, Common::BitCast(input), + result_ps1_bits, result_ps1, + expected_ps1, Common::BitCast(expected_ps1)); +} + +static void Sum1Test(const u64* input_ptr, RoundingMode rounding_mode) +{ + // The opposite of ps_sum0, only checks ps0 + double result_ps0; + double one = 1.0; + u64 input = *input_ptr; + u64 expected_ps0 = RoundMantissaBitsAssumeFinite(input, rounding_mode); + + asm volatile ("lfd %0, 0(%2)\n" + "ps_merge00 %0, %0, %0\n" + "isync\n" + "lfd %0, 0(%2)\n" + "isync\n" + "ps_sum1 %0, %0, %0, %1\n" + : "=f"(result_ps0) + : "f"(one), "r"(input_ptr) + ); + + u64 result_ps0_bits = Common::BitCast(result_ps0); + + DO_TEST(result_ps0_bits == expected_ps0, + "ps_sum1 0x{:016x} ({}):\n" + " got 0x{:016x} ({})\n" + "expected 0x{:016x} ({})", + input, Common::BitCast(input), + result_ps0_bits, result_ps0, + expected_ps0, Common::BitCast(expected_ps0)); +} + +static void ResTest(const u64* input_ptr) +{ + double result_ps0; + double result_ps1; + u64 input = *input_ptr; + double input_float = Common::BitCast(input); + + double expected_ps0_float = fres_expected(input_float); + u64 expected_ps0 = TruncateMantissaBits(Common::BitCast(expected_ps0_float)); + double expected_ps1_float = expected_ps0_float; + u64 expected_ps1 = expected_ps0; + + // If the full precision input would've only been a value which *truncates* to 0, + // it *always* sets the sign of the input for some reason + if ((input & 0x7fffffffe0000000) == 0 && (input & ~DOUBLE_SIGN) != 0) { + expected_ps1 |= DOUBLE_SIGN; + expected_ps1_float = Common::BitCast(expected_ps1); + } + + asm volatile ("ps_mr %0, %0\n" + "lfd %0, 0(%2)\n" + "ps_merge00 %1, %0, %0\n" + "ps_res %0, %0\n" + "ps_res %1, %1\n" + "ps_merge11 %1, %1, %1\n" + : "=f"(result_ps0), "=f"(result_ps1) + : "r"(input_ptr) + ); + + u64 result_ps0_bits = Common::BitCast(result_ps0); + u64 result_ps1_bits = Common::BitCast(result_ps1); + + DO_TEST(result_ps0_bits == expected_ps0 + && result_ps1_bits == expected_ps1, + "ps_res 0x{:016x} ({}):\n" + " got 0x{:016x} ({}) 0x{:016x} ({})\n" + "expected 0x{:016x} ({}) 0x{:016x} ({})", + input, input_float, + result_ps0_bits, result_ps0, + result_ps1_bits, result_ps1, + expected_ps0, expected_ps0_float, + expected_ps1, expected_ps1_float); +} + static void RsqrteTest(const u64* input_ptr) { double result_ps0; @@ -241,7 +346,15 @@ static void RsqrteTest(const u64* input_ptr) double result_unrounded_ps1 = frsqrte_expected(input_ps1_float); u64 expected_ps1 = TruncateMantissaBits(Common::BitCast(result_unrounded_ps1)); - asm volatile ("lfd %0, 0(%2)\n" + // If the full precision input would've only been a value which *truncates* to 0, + // it *always* sets the sign of the input for some reason, which will + // return NaN here + if ((input_ps0 & 0x7fffffffe0000000) == 0 && (input_ps0 & ~DOUBLE_SIGN) != 0) { + expected_ps1 = 0x7ff8000000000000; + } + + asm volatile ("ps_mr %0, %0\n" + "lfd %0, 0(%2)\n" "ps_merge00 %1, %0, %0\n" "ps_rsqrte %0, %0\n" "ps_rsqrte %1, %1\n" @@ -285,15 +398,33 @@ static void PSMoveTest() 0x3690000000000000, // Min single denormal / 2 0x36a8000000000000, // Min single denormal * 3 / 2 0x36a8000000000000, // Min single denormal * 3 / 2 - 0x7fefffffffffffff, // Max double denormal - 0x47efffffe0000000, // Max single denormal - 0x47effffff0000000, // Max single denormal + round + 0x000fffffc0000000, // Not max double denormal + 0x380fffff80000000, // Not max single denormal + 0x000fffffffffffff, // Max double denormal + 0x001fffffffffffff, // Not denormal double + 0x380fffffc0000000, // Max single denormal + 0x380fffffe0000000, // Max single denormal + even + 0x380ffffff0000000, // Max single denormal + round + 0x380fffffffffffff, // Max single denormal + big influence + 0x7fefffffffffffff, // Max double normal + 0x47efffffe0000000, // Max single normal + 0x47effffff0000000, // Max single normal + round 0x0000000010000000, // Double denormal (no round even) 0x0000000010000001, // Double denormal (round even) + 0x000000001fffffff, // Max min which should round/trunc + 0x0000000020000000, // Min nonzero which should be agreed upon 0x0000000030000000, // Double denormal (round even) 0x500fffffd0000000, // Double big (no round even) 0x500fffffd0000001, // Double big (round even) 0x500ffffff0000000, // Double big (round even) + 0x3fffffffffffffff, // Smallest number below 2 + 0x3fffffffd0000000, // Another small number below 2 + 0x3fffffffe0000000, // Small number below 2 (ties even) + 0x3fffffffe0000001, // Small number below 2 (round up) + 0x3fffffffffffffff, // Denormal with influence + 0x1fffffffd0000000, // Similar denormal + 0x1fffffffe0000000, // Similar denormal again (ties even) + 0x1fffffffe0000001, // Similar denormal yet again (round up) 0x0123456789abcdef, // Random 0x76543210fedcba09, // Random @@ -326,6 +457,10 @@ static void PSMoveTest() NegTest(input_ref, rounding_mode); AbsTest(input_ref, rounding_mode); NabsTest(input_ref, rounding_mode); + SelTest(input_ref, rounding_mode); + Sum0Test(input_ref); + Sum1Test(input_ref, rounding_mode); + ResTest(input_ref); RsqrteTest(input_ref); } } diff --git a/cputest/reciprocal.cpp b/cputest/reciprocal.cpp index 6abbd33..eb65c47 100644 --- a/cputest/reciprocal.cpp +++ b/cputest/reciprocal.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include #include #include #include @@ -10,59 +9,9 @@ #include #include -#include "Common/CommonFloat.h" +#include "Common/FloatUtils.h" #include "Common/hwtests.h" -static double fres_expected(double val) -{ - static const int estimate_base[] = { - 0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800, - 0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00, - 0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800, - 0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00, - }; - static const int estimate_dec[] = { - 0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245, - 0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e, - 0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106, - }; - - union - { - double valf; - s64 vali; - }; - valf = val; - s64 mantissa = vali & ((1LL << 52) - 1); - s64 sign = vali & (1ULL << 63); - s64 exponent = vali & (0x7FFLL << 52); - - // Special case 0 - if (mantissa == 0 && exponent == 0) - return sign ? -std::numeric_limits::infinity() : - std::numeric_limits::infinity(); - // Special case NaN-ish numbers - if (exponent == (0x7FFLL << 52)) - { - if (mantissa == 0) - return sign ? -0.0 : 0.0; - return 0.0 + valf; - } - // Special case small inputs - if (exponent < (895LL << 52)) - return sign ? -FLT_MAX : FLT_MAX; - // Special case large inputs - if (exponent >= (1149LL << 52)) - return sign ? -0.0f : 0.0f; - - exponent = (0x7FDLL << 52) - exponent; - - int i = (int)(mantissa >> 37); - vali = sign | exponent; - vali |= (s64)(estimate_base[i / 1024] - (estimate_dec[i / 1024] * (i % 1024) + 1) / 2) << 29; - return valf; -} - static inline double fres_intrinsic(double val) { double estimate;