mirror of
https://github.com/openharmony/third_party_rust_minimal-lexical.git
synced 2026-07-01 09:25:04 -04:00
Initial commit.
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
[package]
|
||||
authors = ["Alex Huszagh <ahuszagh@gmail.com>"]
|
||||
name = "minimal_lexical"
|
||||
version = "0.0.1"
|
||||
@@ -0,0 +1,78 @@
|
||||
use super::cached_float80;
|
||||
use super::float::ExtendedFloat;
|
||||
|
||||
// POWERS
|
||||
|
||||
/// Precalculated powers that uses two-separate arrays for memory-efficiency.
|
||||
#[doc(hidden)]
|
||||
pub(crate) struct ExtendedFloatArray {
|
||||
// Pre-calculated mantissa for the powers.
|
||||
pub mant: &'static [u64],
|
||||
// Pre-calculated binary exponents for the powers.
|
||||
pub exp: &'static [i32],
|
||||
}
|
||||
|
||||
/// Allow indexing of values without bounds checking
|
||||
impl ExtendedFloatArray {
|
||||
#[inline]
|
||||
pub fn get_extended_float(&self, index: usize) -> ExtendedFloat {
|
||||
let mant = self.mant[index];
|
||||
let exp = self.exp[index];
|
||||
ExtendedFloat { mant: mant, exp: exp }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.mant.len()
|
||||
}
|
||||
}
|
||||
|
||||
// MODERATE PATH POWERS
|
||||
|
||||
/// Precalculated powers of base N for the moderate path.
|
||||
#[doc(hidden)]
|
||||
pub(crate) struct ModeratePathPowers {
|
||||
// Pre-calculated small powers.
|
||||
pub small: ExtendedFloatArray,
|
||||
// Pre-calculated large powers.
|
||||
pub large: ExtendedFloatArray,
|
||||
/// Pre-calculated small powers as 64-bit integers
|
||||
pub small_int: &'static [u64],
|
||||
// Step between large powers and number of small powers.
|
||||
pub step: i32,
|
||||
// Exponent bias for the large powers.
|
||||
pub bias: i32,
|
||||
}
|
||||
|
||||
/// Allow indexing of values without bounds checking
|
||||
impl ModeratePathPowers {
|
||||
#[inline]
|
||||
pub fn get_small(&self, index: usize) -> ExtendedFloat {
|
||||
self.small.get_extended_float(index)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_large(&self, index: usize) -> ExtendedFloat {
|
||||
self.large.get_extended_float(index)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_small_int(&self, index: usize) -> u64 {
|
||||
self.small_int[index]
|
||||
}
|
||||
}
|
||||
|
||||
// CACHED EXTENDED POWERS
|
||||
|
||||
/// Cached powers as a trait for a floating-point type.
|
||||
pub(super) trait ModeratePathCache {
|
||||
/// Get cached powers.
|
||||
fn get_powers() -> &'static ModeratePathPowers;
|
||||
}
|
||||
|
||||
impl ModeratePathCache for ExtendedFloat {
|
||||
#[inline]
|
||||
fn get_powers() -> &'static ModeratePathPowers {
|
||||
cached_float80::get_powers()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,345 @@
|
||||
//! Cached exponents for basen values with 80-bit extended floats.
|
||||
//!
|
||||
//! Exact versions of base**n as an extended-precision float, with both
|
||||
//! large and small powers. Use the large powers to minimize the amount
|
||||
//! of compounded error.
|
||||
//!
|
||||
//! These values were calculated using Python, using the arbitrary-precision
|
||||
//! integer to calculate exact extended-representation of each value.
|
||||
//! These values are all normalized.
|
||||
//!
|
||||
//! This files takes ~ 26KB of storage.
|
||||
//!
|
||||
//! This file is mostly automatically generated, do not change values
|
||||
//! manually, unless you know what you are doing. The script to generate
|
||||
//! the values is as follows:
|
||||
//!
|
||||
//! ```text
|
||||
//! import math
|
||||
//! from collections import deque
|
||||
//!
|
||||
//! STEP_STR = "const BASE{0}_STEP: i32 = {1};"
|
||||
//! SMALL_MANTISSA_STR = "const BASE{0}_SMALL_MANTISSA: [u64; {1}] = ["
|
||||
//! SMALL_EXPONENT_STR = "const BASE{0}_SMALL_EXPONENT: [i32; {1}] = ["
|
||||
//! LARGE_MANTISSA_STR = "const BASE{0}_LARGE_MANTISSA: [u64; {1}] = ["
|
||||
//! LARGE_EXPONENT_STR = "const BASE{0}_LARGE_EXPONENT: [i32; {1}] = ["
|
||||
//! SMALL_INT_STR = "const BASE{0}_SMALL_INT_POWERS: [u64; {1}] = {2};"
|
||||
//! BIAS_STR = "const BASE{0}_BIAS: i32 = {1};"
|
||||
//! EXP_STR = "// {}^{}"
|
||||
//! POWER_STR = """pub(crate) const BASE{0}_POWERS: ModeratePathPowers<u64> = ModeratePathPowers {{
|
||||
//! small: ExtendedFloatArray {{ mant: &BASE{0}_SMALL_MANTISSA, exp: &BASE{0}_SMALL_EXPONENT }},
|
||||
//! large: ExtendedFloatArray {{ mant: &BASE{0}_LARGE_MANTISSA, exp: &BASE{0}_LARGE_EXPONENT }},
|
||||
//! small_int: &BASE{0}_SMALL_INT_POWERS,
|
||||
//! step: BASE{0}_STEP,
|
||||
//! bias: BASE{0}_BIAS,
|
||||
//! }};\n"""
|
||||
//!
|
||||
//! def calculate_bitshift(base, exponent):
|
||||
//! '''
|
||||
//! Calculate the bitshift required for a given base. The exponent
|
||||
//! is the absolute value of the max exponent (log distance from 1.)
|
||||
//! '''
|
||||
//!
|
||||
//! return 63 + math.ceil(math.log2(base**exponent))
|
||||
//!
|
||||
//!
|
||||
//! def next_fp(fp, base, step = 1):
|
||||
//! '''Generate the next extended-floating point value.'''
|
||||
//!
|
||||
//! return (fp[0] * (base**step), fp[1])
|
||||
//!
|
||||
//!
|
||||
//! def prev_fp(fp, base, step = 1):
|
||||
//! '''Generate the previous extended-floating point value.'''
|
||||
//!
|
||||
//! return (fp[0] // (base**step), fp[1])
|
||||
//!
|
||||
//!
|
||||
//! def normalize_fp(fp):
|
||||
//! '''Normalize a extended-float so the MSB is the 64th bit'''
|
||||
//!
|
||||
//! while fp[0] >> 64 != 0:
|
||||
//! fp = (fp[0] >> 1, fp[1] + 1)
|
||||
//! return fp
|
||||
//!
|
||||
//!
|
||||
//! def generate_small(base, count):
|
||||
//! '''Generate the small powers for a given base'''
|
||||
//!
|
||||
//! bitshift = calculate_bitshift(base, count)
|
||||
//! fps = []
|
||||
//! fp = (1 << bitshift, -bitshift)
|
||||
//! for exp in range(count):
|
||||
//! fps.append((normalize_fp(fp), exp))
|
||||
//! fp = next_fp(fp, base)
|
||||
//!
|
||||
//! # Print the small powers as integers.
|
||||
//! ints = [base**i for _, i in fps]
|
||||
//!
|
||||
//! return fps, ints
|
||||
//!
|
||||
//!
|
||||
//! def generate_large(base, step):
|
||||
//! '''Generate the large powers for a given base.'''
|
||||
//!
|
||||
//! # Get our starting parameters
|
||||
//! min_exp = math.floor(math.log(5e-324, base) - math.log(0xFFFFFFFFFFFFFFFF, base))
|
||||
//! max_exp = math.ceil(math.log(1.7976931348623157e+308, base))
|
||||
//! bitshift = calculate_bitshift(base, abs(min_exp - step))
|
||||
//! fps = deque()
|
||||
//!
|
||||
//! # Add negative exponents
|
||||
//! # We need to go below the minimum exponent, since we need
|
||||
//! # all resulting exponents to be positive.
|
||||
//! fp = (1 << bitshift, -bitshift)
|
||||
//! for exp in range(-step, min_exp-step, -step):
|
||||
//! fp = prev_fp(fp, base, step)
|
||||
//! fps.appendleft((normalize_fp(fp), exp))
|
||||
//!
|
||||
//! # Add positive exponents
|
||||
//! fp = (1 << bitshift, -bitshift)
|
||||
//! fps.append((normalize_fp(fp), 0))
|
||||
//! for exp in range(step, max_exp, step):
|
||||
//! fp = next_fp(fp, base, step)
|
||||
//! fps.append((normalize_fp(fp), exp))
|
||||
//!
|
||||
//! # Return the smallest exp, AKA, the bias
|
||||
//! return fps, -fps[0][1]
|
||||
//!
|
||||
//!
|
||||
//! def print_array(base, string, fps, index):
|
||||
//! '''Print an entire array'''
|
||||
//!
|
||||
//! print(string.format(base, len(fps)))
|
||||
//! for fp, exp in fps:
|
||||
//! value = " {},".format(fp[index])
|
||||
//! exp = EXP_STR.format(base, exp)
|
||||
//! print(value.ljust(30, " ") + exp)
|
||||
//! print("];")
|
||||
//!
|
||||
//!
|
||||
//! def generate_base(base):
|
||||
//! '''Generate all powers and variables.'''
|
||||
//!
|
||||
//! step = math.floor(math.log(1e10, base))
|
||||
//! small, ints = generate_small(base, step)
|
||||
//! large, bias = generate_large(base, step)
|
||||
//!
|
||||
//! print_array(base, SMALL_MANTISSA_STR, small, 0)
|
||||
//! print_array(base, SMALL_EXPONENT_STR, small, 1)
|
||||
//! print_array(base, LARGE_MANTISSA_STR, large, 0)
|
||||
//! print_array(base, LARGE_EXPONENT_STR, large, 1)
|
||||
//! print(SMALL_INT_STR.format(base, len(ints), ints))
|
||||
//! print(STEP_STR.format(base, step))
|
||||
//! print(BIAS_STR.format(base, bias))
|
||||
//!
|
||||
//!
|
||||
//! def generate():
|
||||
//! '''Generate all bases.'''
|
||||
//!
|
||||
//! bases = [
|
||||
//! 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21,
|
||||
//! 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36
|
||||
//! ]
|
||||
//!
|
||||
//! for base in bases:
|
||||
//! print("// BASE{}\n".format(base))
|
||||
//! generate_base(base)
|
||||
//! print("")
|
||||
//!
|
||||
//! print("// HIGH LEVEL\n// ----------\n")
|
||||
//!
|
||||
//! for base in bases:
|
||||
//! print(POWER_STR.format(base))
|
||||
//!
|
||||
//!
|
||||
//! if __name__ == '__main__':
|
||||
//! generate()
|
||||
//! ```
|
||||
|
||||
use super::cached::{ExtendedFloatArray, ModeratePathPowers};
|
||||
|
||||
// LOW-LEVEL
|
||||
// ---------
|
||||
|
||||
// BASE10
|
||||
|
||||
const BASE10_SMALL_MANTISSA: [u64; 10] = [
|
||||
9223372036854775808, // 10^0
|
||||
11529215046068469760, // 10^1
|
||||
14411518807585587200, // 10^2
|
||||
18014398509481984000, // 10^3
|
||||
11258999068426240000, // 10^4
|
||||
14073748835532800000, // 10^5
|
||||
17592186044416000000, // 10^6
|
||||
10995116277760000000, // 10^7
|
||||
13743895347200000000, // 10^8
|
||||
17179869184000000000, // 10^9
|
||||
];
|
||||
const BASE10_SMALL_EXPONENT: [i32; 10] = [
|
||||
-63, // 10^0
|
||||
-60, // 10^1
|
||||
-57, // 10^2
|
||||
-54, // 10^3
|
||||
-50, // 10^4
|
||||
-47, // 10^5
|
||||
-44, // 10^6
|
||||
-40, // 10^7
|
||||
-37, // 10^8
|
||||
-34, // 10^9
|
||||
];
|
||||
const BASE10_LARGE_MANTISSA: [u64; 66] = [
|
||||
11555125961253852697, // 10^-350
|
||||
13451937075301367670, // 10^-340
|
||||
15660115838168849784, // 10^-330
|
||||
18230774251475056848, // 10^-320
|
||||
10611707258198326947, // 10^-310
|
||||
12353653155963782858, // 10^-300
|
||||
14381545078898527261, // 10^-290
|
||||
16742321987285426889, // 10^-280
|
||||
9745314011399999080, // 10^-270
|
||||
11345038669416679861, // 10^-260
|
||||
13207363278391631158, // 10^-250
|
||||
15375394465392026070, // 10^-240
|
||||
17899314949046850752, // 10^-230
|
||||
10418772551374772303, // 10^-220
|
||||
12129047596099288555, // 10^-210
|
||||
14120069793541087484, // 10^-200
|
||||
16437924692338667210, // 10^-190
|
||||
9568131466127621947, // 10^-180
|
||||
11138771039116687545, // 10^-170
|
||||
12967236152753102995, // 10^-160
|
||||
15095849699286165408, // 10^-150
|
||||
17573882009934360870, // 10^-140
|
||||
10229345649675443343, // 10^-130
|
||||
11908525658859223294, // 10^-120
|
||||
13863348470604074297, // 10^-110
|
||||
16139061738043178685, // 10^-100
|
||||
9394170331095332911, // 10^-90
|
||||
10936253623915059621, // 10^-80
|
||||
12731474852090538039, // 10^-70
|
||||
14821387422376473014, // 10^-60
|
||||
17254365866976409468, // 10^-50
|
||||
10043362776618689222, // 10^-40
|
||||
11692013098647223345, // 10^-30
|
||||
13611294676837538538, // 10^-20
|
||||
15845632502852867518, // 10^-10
|
||||
9223372036854775808, // 10^0
|
||||
10737418240000000000, // 10^10
|
||||
12500000000000000000, // 10^20
|
||||
14551915228366851806, // 10^30
|
||||
16940658945086006781, // 10^40
|
||||
9860761315262647567, // 10^50
|
||||
11479437019748901445, // 10^60
|
||||
13363823550460978230, // 10^70
|
||||
15557538194652854267, // 10^80
|
||||
18111358157653424735, // 10^90
|
||||
10542197943230523224, // 10^100
|
||||
12272733663244316382, // 10^110
|
||||
14287342391028437277, // 10^120
|
||||
16632655625031838749, // 10^130
|
||||
9681479787123295682, // 10^140
|
||||
11270725851789228247, // 10^150
|
||||
13120851772591970218, // 10^160
|
||||
15274681817498023410, // 10^170
|
||||
17782069995880619867, // 10^180
|
||||
10350527006597618960, // 10^190
|
||||
12049599325514420588, // 10^200
|
||||
14027579833653779454, // 10^210
|
||||
16330252207878254650, // 10^220
|
||||
9505457831475799117, // 10^230
|
||||
11065809325636130661, // 10^240
|
||||
12882297539194266616, // 10^250
|
||||
14996968138956309548, // 10^260
|
||||
17458768723248864463, // 10^270
|
||||
10162340898095201970, // 10^280
|
||||
11830521861667747109, // 10^290
|
||||
13772540099066387756, // 10^300
|
||||
];
|
||||
const BASE10_LARGE_EXPONENT: [i32; 66] = [
|
||||
-1226, // 10^-350
|
||||
-1193, // 10^-340
|
||||
-1160, // 10^-330
|
||||
-1127, // 10^-320
|
||||
-1093, // 10^-310
|
||||
-1060, // 10^-300
|
||||
-1027, // 10^-290
|
||||
-994, // 10^-280
|
||||
-960, // 10^-270
|
||||
-927, // 10^-260
|
||||
-894, // 10^-250
|
||||
-861, // 10^-240
|
||||
-828, // 10^-230
|
||||
-794, // 10^-220
|
||||
-761, // 10^-210
|
||||
-728, // 10^-200
|
||||
-695, // 10^-190
|
||||
-661, // 10^-180
|
||||
-628, // 10^-170
|
||||
-595, // 10^-160
|
||||
-562, // 10^-150
|
||||
-529, // 10^-140
|
||||
-495, // 10^-130
|
||||
-462, // 10^-120
|
||||
-429, // 10^-110
|
||||
-396, // 10^-100
|
||||
-362, // 10^-90
|
||||
-329, // 10^-80
|
||||
-296, // 10^-70
|
||||
-263, // 10^-60
|
||||
-230, // 10^-50
|
||||
-196, // 10^-40
|
||||
-163, // 10^-30
|
||||
-130, // 10^-20
|
||||
-97, // 10^-10
|
||||
-63, // 10^0
|
||||
-30, // 10^10
|
||||
3, // 10^20
|
||||
36, // 10^30
|
||||
69, // 10^40
|
||||
103, // 10^50
|
||||
136, // 10^60
|
||||
169, // 10^70
|
||||
202, // 10^80
|
||||
235, // 10^90
|
||||
269, // 10^100
|
||||
302, // 10^110
|
||||
335, // 10^120
|
||||
368, // 10^130
|
||||
402, // 10^140
|
||||
435, // 10^150
|
||||
468, // 10^160
|
||||
501, // 10^170
|
||||
534, // 10^180
|
||||
568, // 10^190
|
||||
601, // 10^200
|
||||
634, // 10^210
|
||||
667, // 10^220
|
||||
701, // 10^230
|
||||
734, // 10^240
|
||||
767, // 10^250
|
||||
800, // 10^260
|
||||
833, // 10^270
|
||||
867, // 10^280
|
||||
900, // 10^290
|
||||
933, // 10^300
|
||||
];
|
||||
const BASE10_SMALL_INT_POWERS: [u64; 10] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000];
|
||||
const BASE10_STEP: i32 = 10;
|
||||
const BASE10_BIAS: i32 = 350;
|
||||
|
||||
// HIGH LEVEL
|
||||
// ----------
|
||||
|
||||
pub(crate) const BASE10_POWERS: ModeratePathPowers = ModeratePathPowers {
|
||||
small: ExtendedFloatArray { mant: &BASE10_SMALL_MANTISSA, exp: &BASE10_SMALL_EXPONENT },
|
||||
large: ExtendedFloatArray { mant: &BASE10_LARGE_MANTISSA, exp: &BASE10_LARGE_EXPONENT },
|
||||
small_int: &BASE10_SMALL_INT_POWERS,
|
||||
step: BASE10_STEP,
|
||||
bias: BASE10_BIAS,
|
||||
};
|
||||
|
||||
/// Get powers from base.
|
||||
pub(crate) fn get_powers() -> &'static ModeratePathPowers {
|
||||
&BASE10_POWERS
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
use super::float::ExtendedFloat;
|
||||
use super::num::*;
|
||||
|
||||
// INTO FLOAT
|
||||
|
||||
// Export extended-precision float to native float.
|
||||
//
|
||||
// The extended-precision float must be in native float representation,
|
||||
// with overflow/underflow appropriately handled.
|
||||
pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
|
||||
where F: Float
|
||||
{
|
||||
// Export floating-point number.
|
||||
if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
|
||||
// sub-denormal, underflow
|
||||
F::ZERO
|
||||
} else if fp.exp >= F::MAX_EXPONENT {
|
||||
// overflow
|
||||
F::from_bits(F::INFINITY_BITS)
|
||||
} else {
|
||||
// calculate the exp and fraction bits, and return a float from bits.
|
||||
let exp: u64;
|
||||
if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
|
||||
exp = 0;
|
||||
} else {
|
||||
exp = (fp.exp + F::EXPONENT_BIAS).as_u64();
|
||||
}
|
||||
let exp = exp << F::MANTISSA_SIZE;
|
||||
let mant = fp.mant & F::MANTISSA_MASK.as_u64();
|
||||
F::from_bits(F::Unsigned::as_cast(mant | exp))
|
||||
}
|
||||
}
|
||||
+111
@@ -0,0 +1,111 @@
|
||||
// FLOAT TYPE
|
||||
|
||||
use super::convert::*;
|
||||
use super::num::*;
|
||||
use super::rounding::*;
|
||||
use super::shift::*;
|
||||
|
||||
/// Extended precision floating-point type.
|
||||
///
|
||||
/// Private implementation, exposed only for testing purposes.
|
||||
#[doc(hidden)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct ExtendedFloat {
|
||||
/// Mantissa for the extended-precision float.
|
||||
pub mant: u64,
|
||||
/// Binary exponent for the extended-precision float.
|
||||
pub exp: i32,
|
||||
}
|
||||
|
||||
impl ExtendedFloat {
|
||||
// PROPERTIES
|
||||
|
||||
// OPERATIONS
|
||||
|
||||
/// Multiply two normalized extended-precision floats, as if by `a*b`.
|
||||
///
|
||||
/// The precision is maximal when the numbers are normalized, however,
|
||||
/// decent precision will occur as long as both values have high bits
|
||||
/// set. The result is not normalized.
|
||||
///
|
||||
/// Algorithm:
|
||||
/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
|
||||
/// 2. Normalization of the result (not done here).
|
||||
/// 3. Addition of exponents.
|
||||
pub fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
|
||||
// Logic check, values must be decently normalized prior to multiplication.
|
||||
debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
|
||||
|
||||
// Extract high-and-low masks.
|
||||
let ah = self.mant >> u64::HALF;
|
||||
let al = self.mant & u64::LOMASK;
|
||||
let bh = b.mant >> u64::HALF;
|
||||
let bl = b.mant & u64::LOMASK;
|
||||
|
||||
// Get our products
|
||||
let ah_bl = ah * bl;
|
||||
let al_bh = al * bh;
|
||||
let al_bl = al * bl;
|
||||
let ah_bh = ah * bh;
|
||||
|
||||
let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
|
||||
// round up
|
||||
tmp += 1 << (u64::HALF-1);
|
||||
|
||||
ExtendedFloat {
|
||||
mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
|
||||
exp: self.exp + b.exp + u64::FULL
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply in-place, as if by `a*b`.
|
||||
///
|
||||
/// The result is not normalized.
|
||||
pub fn imul(&mut self, b: &ExtendedFloat) {
|
||||
*self = self.mul(b);
|
||||
}
|
||||
|
||||
// NORMALIZE
|
||||
|
||||
/// Normalize float-point number.
|
||||
///
|
||||
/// Shift the mantissa so the number of leading zeros is 0, or the value
|
||||
/// itself is 0.
|
||||
///
|
||||
/// Get the number of bytes shifted.
|
||||
pub fn normalize(&mut self) -> u32 {
|
||||
// Note:
|
||||
// Using the cltz intrinsic via leading_zeros is way faster (~10x)
|
||||
// than shifting 1-bit at a time, via while loop, and also way
|
||||
// faster (~2x) than an unrolled loop that checks at 32, 16, 4,
|
||||
// 2, and 1 bit.
|
||||
//
|
||||
// Using a modulus of pow2 (which will get optimized to a bitwise
|
||||
// and with 0x3F or faster) is slightly slower than an if/then,
|
||||
// however, removing the if/then will likely optimize more branched
|
||||
// code as it removes conditional logic.
|
||||
|
||||
// Calculate the number of leading zeros, and then zero-out
|
||||
// any overflowing bits, to avoid shl overflow when self.mant == 0.
|
||||
let shift = if self.mant == 0 { 0 } else { self.mant.leading_zeros() };
|
||||
shl(self, shift as i32);
|
||||
shift
|
||||
}
|
||||
|
||||
// ROUND
|
||||
|
||||
/// Lossy round float-point number to native mantissa boundaries.
|
||||
pub(crate) fn round_to_native<F>(&mut self)
|
||||
where F: Float
|
||||
{
|
||||
round_to_native::<F>(self)
|
||||
}
|
||||
|
||||
// INTO
|
||||
|
||||
/// Convert into lower-precision native float.
|
||||
pub fn into_float<F: Float>(mut self) -> F {
|
||||
self.round_to_native::<F>();
|
||||
into_float(self)
|
||||
}
|
||||
}
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
mod cached;
|
||||
mod cached_float80;
|
||||
mod convert;
|
||||
mod float;
|
||||
mod num;
|
||||
mod parse;
|
||||
mod rounding;
|
||||
mod shift;
|
||||
|
||||
// API.
|
||||
pub use self::parse::parse_float;
|
||||
+379
@@ -0,0 +1,379 @@
|
||||
use std::ops;
|
||||
|
||||
pub trait AsPrimitive:
|
||||
Sized +
|
||||
Copy +
|
||||
PartialEq +
|
||||
PartialOrd +
|
||||
Send +
|
||||
Sync
|
||||
{
|
||||
fn as_u8(self) -> u8;
|
||||
fn as_u16(self) -> u16;
|
||||
fn as_u32(self) -> u32;
|
||||
fn as_u64(self) -> u64;
|
||||
fn as_u128(self) -> u128;
|
||||
fn as_usize(self) -> usize;
|
||||
fn as_i8(self) -> i8;
|
||||
fn as_i16(self) -> i16;
|
||||
fn as_i32(self) -> i32;
|
||||
fn as_i64(self) -> i64;
|
||||
fn as_i128(self) -> i128;
|
||||
fn as_isize(self) -> isize;
|
||||
fn as_f32(self) -> f32;
|
||||
fn as_f64(self) -> f64;
|
||||
}
|
||||
|
||||
macro_rules! as_primitive_impl {
|
||||
($($t:tt)*) => ($(
|
||||
impl AsPrimitive for $t {
|
||||
#[inline]
|
||||
fn as_u8(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u16(self) -> u16 {
|
||||
self as u16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u64(self) -> u64 {
|
||||
self as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u128(self) -> u128 {
|
||||
self as u128
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i8(self) -> i8 {
|
||||
self as i8
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16(self) -> i16 {
|
||||
self as i16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32(self) -> i32 {
|
||||
self as i32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i64(self) -> i64 {
|
||||
self as i64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i128(self) -> i128 {
|
||||
self as i128
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_isize(self) -> isize {
|
||||
self as isize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_f32(self) -> f32 {
|
||||
self as f32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_f64(self) -> f64 {
|
||||
self as f64
|
||||
}
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
||||
as_primitive_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
|
||||
|
||||
/// An interface for casting between machine scalars.
|
||||
pub trait AsCast: AsPrimitive {
|
||||
/// Creates a number from another value that can be converted into
|
||||
/// a primitive via the `AsPrimitive` trait.
|
||||
fn as_cast<N: AsPrimitive>(n: N) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! as_cast_impl {
|
||||
($t:ty, $meth:ident) => {
|
||||
impl AsCast for $t {
|
||||
#[inline]
|
||||
fn as_cast<N: AsPrimitive>(n: N) -> $t {
|
||||
n.$meth()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
as_cast_impl!(u8, as_u8);
|
||||
as_cast_impl!(u16, as_u16);
|
||||
as_cast_impl!(u32, as_u32);
|
||||
as_cast_impl!(u64, as_u64);
|
||||
as_cast_impl!(u128, as_u128);
|
||||
as_cast_impl!(usize, as_usize);
|
||||
as_cast_impl!(i8, as_i8);
|
||||
as_cast_impl!(i16, as_i16);
|
||||
as_cast_impl!(i32, as_i32);
|
||||
as_cast_impl!(i64, as_i64);
|
||||
as_cast_impl!(i128, as_i128);
|
||||
as_cast_impl!(isize, as_isize);
|
||||
as_cast_impl!(f32, as_f32);
|
||||
as_cast_impl!(f64, as_f64);
|
||||
|
||||
pub trait Number:
|
||||
AsCast +
|
||||
ops::Add<Output=Self> +
|
||||
ops::AddAssign +
|
||||
ops::Div<Output=Self> +
|
||||
ops::DivAssign +
|
||||
ops::Mul<Output=Self> +
|
||||
ops::MulAssign +
|
||||
ops::Rem<Output=Self> +
|
||||
ops::RemAssign +
|
||||
ops::Sub<Output=Self> +
|
||||
ops::SubAssign
|
||||
{}
|
||||
|
||||
macro_rules! number_impl {
|
||||
($($t:tt)*) => ($(
|
||||
impl Number for $t {
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
||||
number_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
|
||||
|
||||
pub trait Integer:
|
||||
Number +
|
||||
ops::BitAnd<Output=Self> +
|
||||
ops::BitAndAssign
|
||||
{
|
||||
const ZERO: Self;
|
||||
}
|
||||
|
||||
macro_rules! integer_impl {
|
||||
($($t:tt)*) => ($(
|
||||
impl Integer for $t {
|
||||
const ZERO: $t = 0;
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
||||
integer_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize }
|
||||
|
||||
pub trait UnsignedInteger: Integer {}
|
||||
|
||||
macro_rules! unsigned_integer_impl {
|
||||
($($t:ty)*) => ($(
|
||||
impl UnsignedInteger for $t {}
|
||||
)*)
|
||||
}
|
||||
|
||||
unsigned_integer_impl! { u8 u16 u32 u64 u128 usize }
|
||||
|
||||
/// Type trait for the mantissa type.
|
||||
pub trait Mantissa: UnsignedInteger {
|
||||
/// Mask for the left-most bit, to check if the value is normalized.
|
||||
const NORMALIZED_MASK: Self;
|
||||
/// Mask to extract the high bits from the integer.
|
||||
const HIMASK: Self;
|
||||
/// Mask to extract the low bits from the integer.
|
||||
const LOMASK: Self;
|
||||
/// Full size of the integer, in bits.
|
||||
const FULL: i32;
|
||||
/// Half size of the integer, in bits.
|
||||
const HALF: i32 = Self::FULL / 2;
|
||||
}
|
||||
|
||||
impl Mantissa for u64 {
|
||||
const NORMALIZED_MASK: u64 = 0x8000000000000000;
|
||||
const HIMASK: u64 = 0xFFFFFFFF00000000;
|
||||
const LOMASK: u64 = 0x00000000FFFFFFFF;
|
||||
const FULL: i32 = 64;
|
||||
}
|
||||
|
||||
/// Get exact exponent limit for radix.
|
||||
pub trait Float: Number {
|
||||
/// Unsigned type of the same size.
|
||||
type Unsigned: UnsignedInteger;
|
||||
|
||||
const ZERO: Self;
|
||||
|
||||
// MASKS
|
||||
|
||||
/// Bitmask for the sign bit.
|
||||
const SIGN_MASK: Self::Unsigned;
|
||||
/// Bitmask for the exponent, including the hidden bit.
|
||||
const EXPONENT_MASK: Self::Unsigned;
|
||||
/// Bitmask for the hidden bit in exponent, which is an implicit 1 in the fraction.
|
||||
const HIDDEN_BIT_MASK: Self::Unsigned;
|
||||
/// Bitmask for the mantissa (fraction), excluding the hidden bit.
|
||||
const MANTISSA_MASK: Self::Unsigned;
|
||||
|
||||
// PROPERTIES
|
||||
|
||||
/// Positive infinity as bits.
|
||||
const INFINITY_BITS: Self::Unsigned;
|
||||
/// Positive infinity as bits.
|
||||
const NEGATIVE_INFINITY_BITS: Self::Unsigned;
|
||||
/// Size of the significand (mantissa) without hidden bit.
|
||||
const MANTISSA_SIZE: i32;
|
||||
/// Bias of the exponet
|
||||
const EXPONENT_BIAS: i32;
|
||||
/// Exponent portion of a denormal float.
|
||||
const DENORMAL_EXPONENT: i32;
|
||||
/// Maximum exponent value in float.
|
||||
const MAX_EXPONENT: i32;
|
||||
|
||||
// ROUNDING
|
||||
|
||||
/// Default number of bits to shift (or 64 - mantissa size - 1).
|
||||
const DEFAULT_SHIFT: i32;
|
||||
/// Mask to determine if a full-carry occurred (1 in bit above hidden bit).
|
||||
const CARRY_MASK: u64;
|
||||
|
||||
/// Get min and max exponent limits (exact) from radix.
|
||||
fn exponent_limit() -> (i32, i32);
|
||||
|
||||
/// Get the number of digits that can be shifted from exponent to mantissa.
|
||||
fn mantissa_limit() -> i32;
|
||||
|
||||
// Re-exported methods from std.
|
||||
fn powi(self, n: i32) -> Self;
|
||||
fn from_bits(u: Self::Unsigned) -> Self;
|
||||
fn to_bits(self) -> Self::Unsigned;
|
||||
|
||||
/// Returns true if the float is a denormal.
|
||||
#[inline]
|
||||
fn is_denormal(self) -> bool {
|
||||
self.to_bits() & Self::EXPONENT_MASK == Self::Unsigned::ZERO
|
||||
}
|
||||
|
||||
/// Get exponent component from the float.
|
||||
#[inline]
|
||||
fn exponent(self) -> i32 {
|
||||
if self.is_denormal() {
|
||||
return Self::DENORMAL_EXPONENT;
|
||||
}
|
||||
|
||||
let bits = self.to_bits();
|
||||
let biased_e: i32 = (bits & Self::EXPONENT_MASK).as_i32() >> Self::MANTISSA_SIZE;
|
||||
biased_e - Self::EXPONENT_BIAS
|
||||
}
|
||||
|
||||
/// Get mantissa (significand) component from float.
|
||||
#[inline]
|
||||
fn mantissa(self) -> Self::Unsigned {
|
||||
let bits = self.to_bits();
|
||||
let s = bits & Self::MANTISSA_MASK;
|
||||
if !self.is_denormal() {
|
||||
s + Self::HIDDEN_BIT_MASK
|
||||
} else {
|
||||
s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Float for f32 {
|
||||
type Unsigned = u32;
|
||||
|
||||
const ZERO: f32 = 0.0;
|
||||
|
||||
const SIGN_MASK: u32 = 0x80000000;
|
||||
const EXPONENT_MASK: u32 = 0x7F800000;
|
||||
const HIDDEN_BIT_MASK: u32 = 0x00800000;
|
||||
const MANTISSA_MASK: u32 = 0x007FFFFF;
|
||||
const INFINITY_BITS: u32 = 0x7F800000;
|
||||
const NEGATIVE_INFINITY_BITS: u32 = Self::INFINITY_BITS | Self::SIGN_MASK;
|
||||
const MANTISSA_SIZE: i32 = 23;
|
||||
const EXPONENT_BIAS: i32 = 127 + Self::MANTISSA_SIZE;
|
||||
const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS;
|
||||
const MAX_EXPONENT: i32 = 0xFF - Self::EXPONENT_BIAS;
|
||||
const DEFAULT_SHIFT: i32 = u64::FULL - f32::MANTISSA_SIZE - 1;
|
||||
const CARRY_MASK: u64 = 0x1000000;
|
||||
|
||||
#[inline]
|
||||
fn exponent_limit() -> (i32, i32) {
|
||||
(-10, 10)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mantissa_limit() -> i32 {
|
||||
7
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn powi(self, n: i32) -> f32 {
|
||||
f32::powi(self, n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_bits(u: u32) -> f32 {
|
||||
f32::from_bits(u)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_bits(self) -> u32 {
|
||||
f32::to_bits(self)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Float for f64 {
|
||||
type Unsigned = u64;
|
||||
|
||||
const ZERO: f64 = 0.0;
|
||||
|
||||
const SIGN_MASK: u64 = 0x8000000000000000;
|
||||
const EXPONENT_MASK: u64 = 0x7FF0000000000000;
|
||||
const HIDDEN_BIT_MASK: u64 = 0x0010000000000000;
|
||||
const MANTISSA_MASK: u64 = 0x000FFFFFFFFFFFFF;
|
||||
const INFINITY_BITS: u64 = 0x7FF0000000000000;
|
||||
const NEGATIVE_INFINITY_BITS: u64 = Self::INFINITY_BITS | Self::SIGN_MASK;
|
||||
const MANTISSA_SIZE: i32 = 52;
|
||||
const EXPONENT_BIAS: i32 = 1023 + Self::MANTISSA_SIZE;
|
||||
const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS;
|
||||
const MAX_EXPONENT: i32 = 0x7FF - Self::EXPONENT_BIAS;
|
||||
const DEFAULT_SHIFT: i32 = u64::FULL - f64::MANTISSA_SIZE - 1;
|
||||
const CARRY_MASK: u64 = 0x20000000000000;
|
||||
|
||||
#[inline]
|
||||
fn exponent_limit() -> (i32, i32) {
|
||||
(-22, 22)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mantissa_limit() -> i32 {
|
||||
15
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn powi(self, n: i32) -> f64 {
|
||||
f64::powi(self, n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_bits(u: u64) -> f64 {
|
||||
f64::from_bits(u)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_bits(self) -> u64 {
|
||||
f64::to_bits(self)
|
||||
}
|
||||
}
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
use super::cached::*;
|
||||
use super::float::ExtendedFloat;
|
||||
use super::num::*;
|
||||
|
||||
const POW10: [u64; 20] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000];
|
||||
|
||||
// FAST
|
||||
// ----
|
||||
|
||||
/// Convert mantissa to exact value for a non-base2 power.
|
||||
///
|
||||
/// Returns the resulting float and if the value can be represented exactly.
|
||||
fn fast_path<F>(mantissa: u64, exponent: i32)
|
||||
-> Option<F>
|
||||
where F: Float
|
||||
{
|
||||
// `mantissa >> (F::MANTISSA_SIZE+1) != 0` effectively checks if the
|
||||
// value has a no bits above the hidden bit, which is what we want.
|
||||
let (min_exp, max_exp) = F::exponent_limit();
|
||||
let shift_exp = F::mantissa_limit();
|
||||
let mantissa_size = F::MANTISSA_SIZE + 1;
|
||||
if mantissa >> mantissa_size != 0 {
|
||||
// Would require truncation of the mantissa.
|
||||
None
|
||||
} else if exponent == 0 {
|
||||
// 0 exponent, same as value, exact representation.
|
||||
let float = F::as_cast(mantissa);
|
||||
Some(float)
|
||||
} else if exponent >= min_exp && exponent <= max_exp {
|
||||
// Value can be exactly represented, return the value.
|
||||
// Use powi, since it's correct, and faster on
|
||||
// the fast-path.
|
||||
let float = F::as_cast(mantissa);
|
||||
let base = F::as_cast(10i32);
|
||||
Some(float.mul(base.powi(exponent)))
|
||||
} else if exponent >= 0 && exponent <= max_exp + shift_exp {
|
||||
// Check to see if we have a disguised fast-path, where the
|
||||
// number of digits in the mantissa is very small, but and
|
||||
// so digits can be shifted from the exponent to the mantissa.
|
||||
// https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
|
||||
let small_powers = POW10;
|
||||
let shift = exponent - max_exp;
|
||||
let power = small_powers[shift.as_usize()];
|
||||
|
||||
// Compute the product of the power, if it overflows,
|
||||
// prematurely return early, otherwise, if we didn't overshoot,
|
||||
// we can get an exact value.
|
||||
let value = mantissa.checked_mul(power)?;
|
||||
if value >> mantissa_size != 0 {
|
||||
None
|
||||
} else {
|
||||
// Use powi, since it's correct, and faster on
|
||||
// the fast-path.
|
||||
let float = F::as_cast(value);
|
||||
let base = F::as_cast(10i32);
|
||||
Some(float.mul(base.powi(max_exp)))
|
||||
}
|
||||
} else {
|
||||
// Cannot be exactly represented, exponent too small or too big,
|
||||
// would require truncation.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// MODERATE
|
||||
// --------
|
||||
|
||||
/// Multiply the floating-point by the exponent.
|
||||
///
|
||||
/// Multiply by pre-calculated powers of the base, modify the extended-
|
||||
/// float, and return if new value and if the value can be represented
|
||||
/// accurately.
|
||||
fn multiply_exponent_extended<F>(fp: &mut ExtendedFloat, exponent: i32)
|
||||
where F: Float
|
||||
{
|
||||
let powers = ExtendedFloat::get_powers();
|
||||
let exponent = exponent.saturating_add(powers.bias);
|
||||
let small_index = exponent % powers.step;
|
||||
let large_index = exponent / powers.step;
|
||||
if exponent < 0 {
|
||||
// Guaranteed underflow (assign 0).
|
||||
fp.mant = 0;
|
||||
} else if large_index as usize >= powers.large.len() {
|
||||
// Overflow (assign infinity)
|
||||
fp.mant = 1 << 63;
|
||||
fp.exp = 0x7FF;
|
||||
} else {
|
||||
// Within the valid exponent range, multiply by the large and small
|
||||
// exponents and return the resulting value.
|
||||
|
||||
// Multiply by the small power.
|
||||
// Check if we can directly multiply by an integer, if not,
|
||||
// use extended-precision multiplication.
|
||||
match fp.mant.overflowing_mul(powers.get_small_int(small_index.as_usize())) {
|
||||
// Overflow, multiplication unsuccessful, go slow path.
|
||||
(_, true) => {
|
||||
fp.normalize();
|
||||
fp.imul(&powers.get_small(small_index.as_usize()));
|
||||
},
|
||||
// No overflow, multiplication successful.
|
||||
(mant, false) => {
|
||||
fp.mant = mant;
|
||||
fp.normalize();
|
||||
},
|
||||
}
|
||||
|
||||
// Multiply by the large power
|
||||
fp.imul(&powers.get_large(large_index.as_usize()));
|
||||
|
||||
// Normalize the floating point (and the errors).
|
||||
fp.normalize();
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a precise native float using an intermediate extended-precision float.
|
||||
///
|
||||
/// Return the float approximation and if the value can be accurately
|
||||
/// represented with mantissa bits of precision.
|
||||
pub(super) fn moderate_path<F>(mantissa: u64, exponent: i32) -> F
|
||||
where F: Float
|
||||
{
|
||||
let mut fp = ExtendedFloat { mant: mantissa, exp: 0 };
|
||||
multiply_exponent_extended::<F>(&mut fp, exponent);
|
||||
fp.into_float::<F>()
|
||||
}
|
||||
|
||||
// PARSE
|
||||
// -----
|
||||
|
||||
/// Parse non-power-of-two radix string to native float.
|
||||
///
|
||||
/// * `mantissa` - Significant digits for float.
|
||||
/// * `exponent` - Mantissa exponent in decimal.
|
||||
///
|
||||
/// # Warning
|
||||
/// The exponent is not the parsed exponent, for example:
|
||||
/// "2.543" would have a mantissa of `2543` and an exponent of `-3`,
|
||||
/// signifying it should be 2543 * 10^-3.
|
||||
pub fn parse_float<F>(mantissa: u64, exponent: i32, truncated: bool) -> F
|
||||
where F: Float
|
||||
{
|
||||
// Process the state to a float.
|
||||
if mantissa == 0 {
|
||||
// Literal 0, return early.
|
||||
// Value cannot be truncated, since truncation only occurs on
|
||||
// overflow or underflow.
|
||||
F::ZERO
|
||||
} else if !truncated {
|
||||
// Try the fast path, no mantissa truncation.
|
||||
if let Some(float) = fast_path::<F>(mantissa, exponent) {
|
||||
float
|
||||
} else {
|
||||
moderate_path(mantissa, exponent)
|
||||
}
|
||||
} else {
|
||||
moderate_path(mantissa, exponent)
|
||||
}
|
||||
}
|
||||
+160
@@ -0,0 +1,160 @@
|
||||
use std::mem;
|
||||
use super::float::ExtendedFloat;
|
||||
use super::num::*;
|
||||
use super::shift::*;
|
||||
|
||||
/// Calculate a scalar factor of 2 above the halfway point.
|
||||
#[inline]
|
||||
fn nth_bit(n: u64) -> u64
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!(n < bits, "nth_bit() overflow in shl.");
|
||||
|
||||
1 << n
|
||||
}
|
||||
|
||||
/// Generate a bitwise mask for the lower `n` bits.
|
||||
#[inline]
|
||||
fn lower_n_mask(n: u64) -> u64
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!(n <= bits, "lower_n_mask() overflow in shl.");
|
||||
|
||||
match n == bits {
|
||||
true => u64::max_value(),
|
||||
false => (1 << n) - 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the halfway point for the lower `n` bits.
|
||||
#[inline]
|
||||
fn lower_n_halfway(n: u64) -> u64
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!(n <= bits, "lower_n_halfway() overflow in shl.");
|
||||
|
||||
match n == 0 {
|
||||
true => 0,
|
||||
false => nth_bit(n - 1),
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position.
|
||||
#[inline]
|
||||
fn internal_n_mask(bit: u64, n: u64) -> u64 {
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl.");
|
||||
debug_assert!(n <= bits, "internal_n_halfway() overflow in shl.");
|
||||
debug_assert!(bit >= n, "internal_n_halfway() overflow in sub.");
|
||||
|
||||
lower_n_mask(bit) ^ lower_n_mask(bit - n)
|
||||
}
|
||||
|
||||
// Shift right N-bytes and round to the nearest.
|
||||
//
|
||||
// Return if we are above halfway and if we are halfway.
|
||||
pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32)
|
||||
-> (bool, bool)
|
||||
{
|
||||
// Extract the truncated bits using mask.
|
||||
// Calculate if the value of the truncated bits are either above
|
||||
// the mid-way point, or equal to it.
|
||||
//
|
||||
// For example, for 4 truncated bytes, the mask would be b1111
|
||||
// and the midway point would be b1000.
|
||||
let mask: u64 = lower_n_mask(shift as u64);
|
||||
let halfway: u64 = lower_n_halfway(shift as u64);
|
||||
|
||||
let truncated_bits = fp.mant & mask;
|
||||
let is_above = truncated_bits > halfway;
|
||||
let is_halfway = truncated_bits == halfway;
|
||||
|
||||
// Bit shift so the leading bit is in the hidden bit.
|
||||
overflowing_shr(fp, shift);
|
||||
|
||||
(is_above, is_halfway)
|
||||
}
|
||||
|
||||
// ROUND TO FLOAT
|
||||
|
||||
// Shift the ExtendedFloat fraction to the fraction bits in a native float.
|
||||
//
|
||||
// Floating-point arithmetic uses round to nearest, ties to even,
|
||||
// which rounds to the nearest value, if the value is halfway in between,
|
||||
// round to an even value.
|
||||
pub(crate) fn round_to_float<F>(fp: &mut ExtendedFloat)
|
||||
where F: Float
|
||||
{
|
||||
// Calculate the difference to allow a single calculation
|
||||
// rather than a loop, to minimize the number of ops required.
|
||||
// This does underflow detection.
|
||||
let final_exp = fp.exp + F::DEFAULT_SHIFT;
|
||||
if final_exp < F::DENORMAL_EXPONENT {
|
||||
// We would end up with a denormal exponent, try to round to more
|
||||
// digits. Only shift right if we can avoid zeroing out the value,
|
||||
// which requires the exponent diff to be < M::BITS. The value
|
||||
// is already normalized, so we shouldn't have any issue zeroing
|
||||
// out the value.
|
||||
let diff = F::DENORMAL_EXPONENT - fp.exp;
|
||||
if diff <= u64::FULL {
|
||||
// We can avoid underflow, can get a valid representation.
|
||||
round_nearest(fp, diff);
|
||||
} else {
|
||||
// Certain underflow, assign literal 0s.
|
||||
fp.mant = 0;
|
||||
fp.exp = 0;
|
||||
}
|
||||
} else {
|
||||
round_nearest(fp, F::DEFAULT_SHIFT);
|
||||
}
|
||||
|
||||
if fp.mant & F::CARRY_MASK == F::CARRY_MASK {
|
||||
// Roundup carried over to 1 past the hidden bit.
|
||||
shr(fp, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// AVOID OVERFLOW/UNDERFLOW
|
||||
|
||||
// Avoid overflow for large values, shift left as needed.
|
||||
//
|
||||
// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
|
||||
fn avoid_overflow<F>(fp: &mut ExtendedFloat)
|
||||
where F: Float
|
||||
{
|
||||
// Calculate the difference to allow a single calculation
|
||||
// rather than a loop, minimizing the number of ops required.
|
||||
if fp.exp >= F::MAX_EXPONENT {
|
||||
let diff = fp.exp - F::MAX_EXPONENT;
|
||||
if diff <= F::MANTISSA_SIZE {
|
||||
// Our overflow mask needs to start at the hidden bit, or at
|
||||
// `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
|
||||
// to see if our value overflows.
|
||||
let bit = (F::MANTISSA_SIZE + 1).as_u64();
|
||||
let n = (diff + 1).as_u64();
|
||||
let mask = internal_n_mask(bit, n);
|
||||
if (fp.mant & mask) == 0 {
|
||||
// If we have no 1-bit in the hidden-bit position,
|
||||
// which is index 0, we need to shift 1.
|
||||
let shift = diff + 1;
|
||||
shl(fp, shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ROUND TO NATIVE
|
||||
|
||||
// Round an extended-precision float to a native float representation.
|
||||
pub(crate) fn round_to_native<F>(fp: &mut ExtendedFloat)
|
||||
where F: Float
|
||||
{
|
||||
// Shift all the way left, to ensure a consistent representation.
|
||||
// The following right-shifts do not work for a non-normalized number.
|
||||
fp.normalize();
|
||||
|
||||
// Round so the fraction is in a native mantissa representation,
|
||||
// and avoid overflow/underflow.
|
||||
round_to_float::<F>(fp);
|
||||
avoid_overflow::<F>(fp);
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
use std::mem;
|
||||
use super::float::ExtendedFloat;
|
||||
|
||||
// Shift extended-precision float right `shift` bytes.
|
||||
pub(crate) fn shr(fp: &mut ExtendedFloat, shift: i32)
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!((shift as u64) < bits, "shr() overflow in shift right.");
|
||||
|
||||
fp.mant >>= shift;
|
||||
fp.exp += shift;
|
||||
}
|
||||
|
||||
// Shift extended-precision float right `shift` bytes.
|
||||
//
|
||||
// Accepts when the shift is the same as the type size, and
|
||||
// sets the value to 0.
|
||||
pub(crate) fn overflowing_shr(fp: &mut ExtendedFloat, shift: i32)
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!((shift as u64) <= bits, "overflowing_shr() overflow in shift right.");
|
||||
|
||||
fp.mant = match shift as u64 == bits {
|
||||
true => 0,
|
||||
false => fp.mant >> shift,
|
||||
};
|
||||
fp.exp += shift;
|
||||
}
|
||||
|
||||
// Shift extended-precision float left `shift` bytes.
|
||||
pub(crate) fn shl(fp: &mut ExtendedFloat, shift: i32)
|
||||
{
|
||||
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
|
||||
debug_assert!((shift as u64) < bits, "shl() overflow in shift left.");
|
||||
|
||||
fp.mant <<= shift;
|
||||
fp.exp -= shift;
|
||||
}
|
||||
Reference in New Issue
Block a user