From 35a468ed38dcc97e3bdedf8d7b2e6973e8dbe836 Mon Sep 17 00:00:00 2001 From: Junekey Jeon Date: Wed, 12 Jan 2022 16:41:32 -0800 Subject: [PATCH] Simplify integer checks --- include/fmt/format-inl.h | 143 +++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 51 deletions(-) diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index ffd72f8b..1460294c 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -864,26 +864,27 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) FMT_NOEXCEPT { #endif } -// Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a +// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { - uint128_wrapper g0 = umul128(x, y.high()); - g0 += umul128_upper64(x, y.low()); - return g0.high(); +inline uint128_wrapper umul192_upper128(uint64_t x, + uint128_wrapper y) FMT_NOEXCEPT { + uint128_wrapper r = umul128(x, y.high()); + r += umul128_upper64(x, y.low()); + return r; } -// Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a +// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT { - return static_cast(umul128_upper64(x, y)); +inline uint64_t umul96_upper64(uint32_t x, uint64_t y) FMT_NOEXCEPT { + return umul128_upper64(uint64_t(x) << 32, y); } -// Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a +// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { - uint64_t g01 = x * y.high(); - uint64_t g10 = umul128_upper64(x, y.low()); - return g01 + g10; +inline uint64_t umul192_lower128(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { + uint64_t high = x * y.high(); + uint128_wrapper high_low = umul128(x, y.low()); + return {high + high_low.high(), high_low.low()}; } // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a @@ -1071,9 +1072,20 @@ template <> struct cache_accessor { return pow10_significands[k - float_info::min_k]; } - static carrier_uint compute_mul(carrier_uint u, - const cache_entry_type& cache) FMT_NOEXCEPT { - return umul96_upper32(u, cache); + struct compute_mul_result { + carrier_uint result; + bool is_integer; + }; + struct compute_mul_parity_result { + bool parity; + bool is_integer; + }; + + static compute_mul_result compute_mul( + carrier_uint u, const cache_entry_type& cache) FMT_NOEXCEPT { + auto r = umul96_upper64(u, cache); + return {static_cast(r >> 32), + static_cast(r) == 0}; } static uint32_t compute_delta(const cache_entry_type& cache, @@ -1081,13 +1093,15 @@ template <> struct cache_accessor { return static_cast(cache >> (64 - 1 - beta_minus_1)); } - static bool compute_mul_parity(carrier_uint two_f, - const cache_entry_type& cache, - int beta_minus_1) FMT_NOEXCEPT { + static compute_mul_parity_result compute_mul_parity( + carrier_uint two_f, const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { FMT_ASSERT(beta_minus_1 >= 1, ""); FMT_ASSERT(beta_minus_1 < 64, ""); - return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; + auto r = umul96_lower64(two_f, cache); + return {((r >> (64 - beta_minus_1)) & 1) != 0, + static_cast(r >> (32 - beta_minus_1)) == 0}; } static carrier_uint compute_left_endpoint_for_shorter_interval_case( @@ -1838,9 +1852,19 @@ template <> struct cache_accessor { #endif } - static carrier_uint compute_mul(carrier_uint u, - const cache_entry_type& cache) FMT_NOEXCEPT { - return umul192_upper64(u, cache); + struct compute_mul_result { + carrier_uint result; + bool is_integer; + }; + struct compute_mul_parity_result { + bool parity; + bool is_integer; + }; + + static compute_mul_result compute_mul( + carrier_uint u, const cache_entry_type& cache) FMT_NOEXCEPT { + auto r = umul192_upper128(u, cache); + return {r.high(), r.low() == 0}; } static uint32_t compute_delta(cache_entry_type const& cache, @@ -1848,13 +1872,16 @@ template <> struct cache_accessor { return static_cast(cache.high() >> (64 - 1 - beta_minus_1)); } - static bool compute_mul_parity(carrier_uint two_f, - const cache_entry_type& cache, - int beta_minus_1) FMT_NOEXCEPT { + static compute_mul_parity_result compute_mul_parity( + carrier_uint two_f, const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { FMT_ASSERT(beta_minus_1 >= 1, ""); FMT_ASSERT(beta_minus_1 < 64, ""); - return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; + auto r = umul192_lower128(two_f, cache); + return { + ((r.high() >> (64 - beta_minus_1)) & 1) != 0, + ((r.high() << beta_minus_1) | (r.low() >> (64 - beta_minus_1))) == 0}; } static carrier_uint compute_left_endpoint_for_shorter_interval_case( @@ -2114,38 +2141,49 @@ template decimal_fp to_decimal(T x) FMT_NOEXCEPT { // 10^kappa <= deltai < 10^(kappa + 1) const uint32_t deltai = cache_accessor::compute_delta(cache, beta_minus_1); const carrier_uint two_fc = significand << 1; - const carrier_uint two_fr = two_fc | 1; - const carrier_uint zi = - cache_accessor::compute_mul(two_fr << beta_minus_1, cache); + const typename cache_accessor::compute_mul_result z_mul = + cache_accessor::compute_mul((two_fc | 1) << beta_minus_1, cache); // Step 2: Try larger divisor; remove trailing zeros if necessary. // Using an upper bound on zi, we might be able to optimize the division // better than the compiler; we are computing zi / big_divisor here. decimal_fp ret_value; - ret_value.significand = divide_by_10_to_kappa_plus_1(zi); - uint32_t r = static_cast(zi - float_info::big_divisor * - ret_value.significand); + ret_value.significand = divide_by_10_to_kappa_plus_1(z_mul.result); + const uint32_t r = static_cast( + z_mul.result - float_info::big_divisor * ret_value.significand); if (r > deltai) { goto small_divisor_case_label; } else if (r < deltai) { // Exclude the right endpoint if necessary. - if (r == 0 && !include_right_endpoint && - is_endpoint_integer(two_fr, exponent, minus_k)) { + if (r == 0 && z_mul.is_integer && !include_right_endpoint) { --ret_value.significand; r = float_info::big_divisor; goto small_divisor_case_label; } } else { - // r == deltai; compare fractional parts - // Check conditions in the order different from the paper - // to take advantage of short-circuiting. + // r == deltai; compare fractional parts. const carrier_uint two_fl = two_fc - 1; - if ((!include_left_endpoint || - !is_endpoint_integer(two_fl, exponent, minus_k)) && - !cache_accessor::compute_mul_parity(two_fl, cache, beta_minus_1)) { - goto small_divisor_case_label; + + if (!include_left_endpoint || + exponent < float_info::case_fc_pm_half_lower_threshold || + exponent > float_info::divisibility_check_by_5_threshold) { + // If the left endpoint is not included, the condition for + // success is z^(f) < delta^(f) (odd parity). + // Otherwise, the inequalities on exponent ensure that + // x is not an integer, so if z^(f) >= delta^(f) (even parity), we in fact + // have strict inequality. + if (!cache_accessor::compute_mul_parity(two_fl, cache, beta_minus_1) + .parity) { + goto small_divisor_case_label; + } + } else { + const typename cache_accessor::compute_mul_parity_result x_mul = = + compute_mul_parity(two_fl, cache, beta_minus_1); + if (!x_mul.parity && !x_mul.is_integer) { + goto small_divisor_case_label; + } } } ret_value.exponent = minus_k + float_info::kappa + 1; @@ -2160,37 +2198,40 @@ small_divisor_case_label: ret_value.significand *= 10; ret_value.exponent = minus_k + float_info::kappa; - auto dist = r - (deltai / 2) + (float_info::small_divisor / 2); - bool const approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0; + uint32_t dist = r - (deltai / 2) + (float_info::small_divisor / 2); + const bool approx_y_parity = + ((dist ^ (float_info::small_divisor / 2)) & 1) != 0; // Is dist divisible by 10^kappa? - bool divisible_by_10_to_the_kappa = + const bool divisible_by_small_divisor = check_divisibility_and_divide_by_pow10::kappa>(dist); // Add dist / 10^kappa to the significand. ret_value.significand += dist; - if (divisible_by_10_to_the_kappa) { + if (divisible_by_small_divisor) { // Check z^(f) >= epsilon^(f). // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f) // Since there are only 2 possibilities, we only need to care about the // parity. Also, zi and r should have the same parity since the divisor // is an even number. - if (cache_accessor::compute_mul_parity(two_fc, cache, beta_minus_1) != - approx_y_parity) { + const typename cache_accessor::compute_mul_parity_result y_mul = = + compute_mul_parity(two_fc, cache, beta_minus_1); + + if (y_mul.parity != approx_y_parity) { --ret_value.significand; } else { // If z^(f) >= epsilon^(f), we might have a tie // when z^(f) == epsilon^(f), or equivalently, when y is an integer - if (is_center_integer(two_fc, exponent, minus_k)) { + if (y_mul.is_integer) { ret_value.significand = ret_value.significand % 2 == 0 ? ret_value.significand : ret_value.significand - 1; } + } + return ret_value; } - return ret_value; -} } // namespace dragonbox // Formats a floating-point number using a variation of the Fixed-Precision