Initial commit.

2026-07-01 09:25:04 -04:00 · 2020-02-06 23:18:19 -06:00
commit fe3c784a0b
10 changed files with 1316 additions and 0 deletions
@@ -0,0 +1,4 @@
+[package]
+authors = ["Alex Huszagh <ahuszagh@gmail.com>"]
+name = "minimal_lexical"
+version = "0.0.1"
@@ -0,0 +1,78 @@
+use super::cached_float80;
+use super::float::ExtendedFloat;
+
+// POWERS
+
+/// Precalculated powers that uses two-separate arrays for memory-efficiency.
+#[doc(hidden)]
+pub(crate) struct ExtendedFloatArray {
+    // Pre-calculated mantissa for the powers.
+    pub mant: &'static [u64],
+    // Pre-calculated binary exponents for the powers.
+    pub exp: &'static [i32],
+}
+
+/// Allow indexing of values without bounds checking
+impl ExtendedFloatArray {
+    #[inline]
+    pub fn get_extended_float(&self, index: usize) -> ExtendedFloat {
+        let mant = self.mant[index];
+        let exp = self.exp[index];
+        ExtendedFloat { mant: mant, exp: exp }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.mant.len()
+    }
+}
+
+// MODERATE PATH POWERS
+
+/// Precalculated powers of base N for the moderate path.
+#[doc(hidden)]
+pub(crate) struct ModeratePathPowers {
+    // Pre-calculated small powers.
+    pub small: ExtendedFloatArray,
+    // Pre-calculated large powers.
+    pub large: ExtendedFloatArray,
+    /// Pre-calculated small powers as 64-bit integers
+    pub small_int: &'static [u64],
+    // Step between large powers and number of small powers.
+    pub step: i32,
+    // Exponent bias for the large powers.
+    pub bias: i32,
+}
+
+/// Allow indexing of values without bounds checking
+impl ModeratePathPowers {
+    #[inline]
+    pub fn get_small(&self, index: usize) -> ExtendedFloat {
+        self.small.get_extended_float(index)
+    }
+
+    #[inline]
+    pub fn get_large(&self, index: usize) -> ExtendedFloat {
+        self.large.get_extended_float(index)
+    }
+
+    #[inline]
+    pub fn get_small_int(&self, index: usize) -> u64 {
+        self.small_int[index]
+    }
+}
+
+// CACHED EXTENDED POWERS
+
+/// Cached powers as a trait for a floating-point type.
+pub(super) trait ModeratePathCache {
+    /// Get cached powers.
+    fn get_powers() -> &'static ModeratePathPowers;
+}
+
+impl ModeratePathCache for ExtendedFloat {
+    #[inline]
+    fn get_powers() -> &'static ModeratePathPowers {
+        cached_float80::get_powers()
+    }
+}
@@ -0,0 +1,345 @@
+//! Cached exponents for basen values with 80-bit extended floats.
+//!
+//! Exact versions of base**n as an extended-precision float, with both
+//! large and small powers. Use the large powers to minimize the amount
+//! of compounded error.
+//!
+//! These values were calculated using Python, using the arbitrary-precision
+//! integer to calculate exact extended-representation of each value.
+//! These values are all normalized.
+//!
+//! This files takes ~ 26KB of storage.
+//!
+//! This file is mostly automatically generated, do not change values
+//! manually, unless you know what you are doing. The script to generate
+//! the values is as follows:
+//!
+//! ```text
+//! import math
+//! from collections import deque
+//!
+//! STEP_STR = "const BASE{0}_STEP: i32 = {1};"
+//! SMALL_MANTISSA_STR = "const BASE{0}_SMALL_MANTISSA: [u64; {1}] = ["
+//! SMALL_EXPONENT_STR = "const BASE{0}_SMALL_EXPONENT: [i32; {1}] = ["
+//! LARGE_MANTISSA_STR = "const BASE{0}_LARGE_MANTISSA: [u64; {1}] = ["
+//! LARGE_EXPONENT_STR = "const BASE{0}_LARGE_EXPONENT: [i32; {1}] = ["
+//! SMALL_INT_STR = "const BASE{0}_SMALL_INT_POWERS: [u64; {1}] = {2};"
+//! BIAS_STR = "const BASE{0}_BIAS: i32 = {1};"
+//! EXP_STR = "// {}^{}"
+//! POWER_STR = """pub(crate) const BASE{0}_POWERS: ModeratePathPowers<u64> = ModeratePathPowers {{
+//!     small: ExtendedFloatArray {{ mant: &BASE{0}_SMALL_MANTISSA, exp: &BASE{0}_SMALL_EXPONENT }},
+//!     large: ExtendedFloatArray {{ mant: &BASE{0}_LARGE_MANTISSA, exp: &BASE{0}_LARGE_EXPONENT }},
+//!     small_int: &BASE{0}_SMALL_INT_POWERS,
+//!     step: BASE{0}_STEP,
+//!     bias: BASE{0}_BIAS,
+//! }};\n"""
+//!
+//! def calculate_bitshift(base, exponent):
+//!     '''
+//!     Calculate the bitshift required for a given base. The exponent
+//!     is the absolute value of the max exponent (log distance from 1.)
+//!     '''
+//!
+//!     return 63 + math.ceil(math.log2(base**exponent))
+//!
+//!
+//! def next_fp(fp, base, step = 1):
+//!     '''Generate the next extended-floating point value.'''
+//!
+//!     return (fp[0] * (base**step), fp[1])
+//!
+//!
+//! def prev_fp(fp, base, step = 1):
+//!     '''Generate the previous extended-floating point value.'''
+//!
+//!     return (fp[0] // (base**step), fp[1])
+//!
+//!
+//! def normalize_fp(fp):
+//!     '''Normalize a extended-float so the MSB is the 64th bit'''
+//!
+//!     while fp[0] >> 64 != 0:
+//!         fp = (fp[0] >> 1, fp[1] + 1)
+//!     return fp
+//!
+//!
+//! def generate_small(base, count):
+//!     '''Generate the small powers for a given base'''
+//!
+//!     bitshift = calculate_bitshift(base, count)
+//!     fps = []
+//!     fp = (1 << bitshift, -bitshift)
+//!     for exp in range(count):
+//!         fps.append((normalize_fp(fp), exp))
+//!         fp = next_fp(fp, base)
+//!
+//!     # Print the small powers as integers.
+//!     ints = [base**i for _, i in fps]
+//!
+//!     return fps, ints
+//!
+//!
+//! def generate_large(base, step):
+//!     '''Generate the large powers for a given base.'''
+//!
+//!     # Get our starting parameters
+//!     min_exp = math.floor(math.log(5e-324, base) - math.log(0xFFFFFFFFFFFFFFFF, base))
+//!     max_exp = math.ceil(math.log(1.7976931348623157e+308, base))
+//!     bitshift = calculate_bitshift(base, abs(min_exp - step))
+//!     fps = deque()
+//!
+//!     # Add negative exponents
+//!     # We need to go below the minimum exponent, since we need
+//!     # all resulting exponents to be positive.
+//!     fp = (1 << bitshift, -bitshift)
+//!     for exp in range(-step, min_exp-step, -step):
+//!         fp = prev_fp(fp, base, step)
+//!         fps.appendleft((normalize_fp(fp), exp))
+//!
+//!     # Add positive exponents
+//!     fp = (1 << bitshift, -bitshift)
+//!     fps.append((normalize_fp(fp), 0))
+//!     for exp in range(step, max_exp, step):
+//!         fp = next_fp(fp, base, step)
+//!         fps.append((normalize_fp(fp), exp))
+//!
+//!     # Return the smallest exp, AKA, the bias
+//!     return fps, -fps[0][1]
+//!
+//!
+//! def print_array(base, string, fps, index):
+//!     '''Print an entire array'''
+//!
+//!     print(string.format(base, len(fps)))
+//!     for fp, exp in fps:
+//!         value = "    {},".format(fp[index])
+//!         exp = EXP_STR.format(base, exp)
+//!         print(value.ljust(30, " ") + exp)
+//!     print("];")
+//!
+//!
+//! def generate_base(base):
+//!     '''Generate all powers and variables.'''
+//!
+//!     step = math.floor(math.log(1e10, base))
+//!     small, ints = generate_small(base, step)
+//!     large, bias = generate_large(base, step)
+//!
+//!     print_array(base, SMALL_MANTISSA_STR, small, 0)
+//!     print_array(base, SMALL_EXPONENT_STR, small, 1)
+//!     print_array(base, LARGE_MANTISSA_STR, large, 0)
+//!     print_array(base, LARGE_EXPONENT_STR, large, 1)
+//!     print(SMALL_INT_STR.format(base, len(ints), ints))
+//!     print(STEP_STR.format(base, step))
+//!     print(BIAS_STR.format(base, bias))
+//!
+//!
+//! def generate():
+//!     '''Generate all bases.'''
+//!
+//!     bases = [
+//!         3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21,
+//!         22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36
+//!     ]
+//!
+//!     for base in bases:
+//!         print("// BASE{}\n".format(base))
+//!         generate_base(base)
+//!         print("")
+//!
+//!     print("// HIGH LEVEL\n// ----------\n")
+//!
+//!     for base in bases:
+//!         print(POWER_STR.format(base))
+//!
+//!
+//! if __name__ == '__main__':
+//!     generate()
+//! ```
+
+use super::cached::{ExtendedFloatArray, ModeratePathPowers};
+
+// LOW-LEVEL
+// ---------
+
+// BASE10
+
+const BASE10_SMALL_MANTISSA: [u64; 10] = [
+    9223372036854775808,      // 10^0
+    11529215046068469760,     // 10^1
+    14411518807585587200,     // 10^2
+    18014398509481984000,     // 10^3
+    11258999068426240000,     // 10^4
+    14073748835532800000,     // 10^5
+    17592186044416000000,     // 10^6
+    10995116277760000000,     // 10^7
+    13743895347200000000,     // 10^8
+    17179869184000000000,     // 10^9
+];
+const BASE10_SMALL_EXPONENT: [i32; 10] = [
+    -63,                      // 10^0
+    -60,                      // 10^1
+    -57,                      // 10^2
+    -54,                      // 10^3
+    -50,                      // 10^4
+    -47,                      // 10^5
+    -44,                      // 10^6
+    -40,                      // 10^7
+    -37,                      // 10^8
+    -34,                      // 10^9
+];
+const BASE10_LARGE_MANTISSA: [u64; 66] = [
+    11555125961253852697,     // 10^-350
+    13451937075301367670,     // 10^-340
+    15660115838168849784,     // 10^-330
+    18230774251475056848,     // 10^-320
+    10611707258198326947,     // 10^-310
+    12353653155963782858,     // 10^-300
+    14381545078898527261,     // 10^-290
+    16742321987285426889,     // 10^-280
+    9745314011399999080,      // 10^-270
+    11345038669416679861,     // 10^-260
+    13207363278391631158,     // 10^-250
+    15375394465392026070,     // 10^-240
+    17899314949046850752,     // 10^-230
+    10418772551374772303,     // 10^-220
+    12129047596099288555,     // 10^-210
+    14120069793541087484,     // 10^-200
+    16437924692338667210,     // 10^-190
+    9568131466127621947,      // 10^-180
+    11138771039116687545,     // 10^-170
+    12967236152753102995,     // 10^-160
+    15095849699286165408,     // 10^-150
+    17573882009934360870,     // 10^-140
+    10229345649675443343,     // 10^-130
+    11908525658859223294,     // 10^-120
+    13863348470604074297,     // 10^-110
+    16139061738043178685,     // 10^-100
+    9394170331095332911,      // 10^-90
+    10936253623915059621,     // 10^-80
+    12731474852090538039,     // 10^-70
+    14821387422376473014,     // 10^-60
+    17254365866976409468,     // 10^-50
+    10043362776618689222,     // 10^-40
+    11692013098647223345,     // 10^-30
+    13611294676837538538,     // 10^-20
+    15845632502852867518,     // 10^-10
+    9223372036854775808,      // 10^0
+    10737418240000000000,     // 10^10
+    12500000000000000000,     // 10^20
+    14551915228366851806,     // 10^30
+    16940658945086006781,     // 10^40
+    9860761315262647567,      // 10^50
+    11479437019748901445,     // 10^60
+    13363823550460978230,     // 10^70
+    15557538194652854267,     // 10^80
+    18111358157653424735,     // 10^90
+    10542197943230523224,     // 10^100
+    12272733663244316382,     // 10^110
+    14287342391028437277,     // 10^120
+    16632655625031838749,     // 10^130
+    9681479787123295682,      // 10^140
+    11270725851789228247,     // 10^150
+    13120851772591970218,     // 10^160
+    15274681817498023410,     // 10^170
+    17782069995880619867,     // 10^180
+    10350527006597618960,     // 10^190
+    12049599325514420588,     // 10^200
+    14027579833653779454,     // 10^210
+    16330252207878254650,     // 10^220
+    9505457831475799117,      // 10^230
+    11065809325636130661,     // 10^240
+    12882297539194266616,     // 10^250
+    14996968138956309548,     // 10^260
+    17458768723248864463,     // 10^270
+    10162340898095201970,     // 10^280
+    11830521861667747109,     // 10^290
+    13772540099066387756,     // 10^300
+];
+const BASE10_LARGE_EXPONENT: [i32; 66] = [
+    -1226,                    // 10^-350
+    -1193,                    // 10^-340
+    -1160,                    // 10^-330
+    -1127,                    // 10^-320
+    -1093,                    // 10^-310
+    -1060,                    // 10^-300
+    -1027,                    // 10^-290
+    -994,                     // 10^-280
+    -960,                     // 10^-270
+    -927,                     // 10^-260
+    -894,                     // 10^-250
+    -861,                     // 10^-240
+    -828,                     // 10^-230
+    -794,                     // 10^-220
+    -761,                     // 10^-210
+    -728,                     // 10^-200
+    -695,                     // 10^-190
+    -661,                     // 10^-180
+    -628,                     // 10^-170
+    -595,                     // 10^-160
+    -562,                     // 10^-150
+    -529,                     // 10^-140
+    -495,                     // 10^-130
+    -462,                     // 10^-120
+    -429,                     // 10^-110
+    -396,                     // 10^-100
+    -362,                     // 10^-90
+    -329,                     // 10^-80
+    -296,                     // 10^-70
+    -263,                     // 10^-60
+    -230,                     // 10^-50
+    -196,                     // 10^-40
+    -163,                     // 10^-30
+    -130,                     // 10^-20
+    -97,                      // 10^-10
+    -63,                      // 10^0
+    -30,                      // 10^10
+    3,                        // 10^20
+    36,                       // 10^30
+    69,                       // 10^40
+    103,                      // 10^50
+    136,                      // 10^60
+    169,                      // 10^70
+    202,                      // 10^80
+    235,                      // 10^90
+    269,                      // 10^100
+    302,                      // 10^110
+    335,                      // 10^120
+    368,                      // 10^130
+    402,                      // 10^140
+    435,                      // 10^150
+    468,                      // 10^160
+    501,                      // 10^170
+    534,                      // 10^180
+    568,                      // 10^190
+    601,                      // 10^200
+    634,                      // 10^210
+    667,                      // 10^220
+    701,                      // 10^230
+    734,                      // 10^240
+    767,                      // 10^250
+    800,                      // 10^260
+    833,                      // 10^270
+    867,                      // 10^280
+    900,                      // 10^290
+    933,                      // 10^300
+];
+const BASE10_SMALL_INT_POWERS: [u64; 10] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000];
+const BASE10_STEP: i32 = 10;
+const BASE10_BIAS: i32 = 350;
+
+// HIGH LEVEL
+// ----------
+
+pub(crate) const BASE10_POWERS: ModeratePathPowers = ModeratePathPowers {
+    small: ExtendedFloatArray { mant: &BASE10_SMALL_MANTISSA, exp: &BASE10_SMALL_EXPONENT },
+    large: ExtendedFloatArray { mant: &BASE10_LARGE_MANTISSA, exp: &BASE10_LARGE_EXPONENT },
+    small_int: &BASE10_SMALL_INT_POWERS,
+    step: BASE10_STEP,
+    bias: BASE10_BIAS,
+};
+
+/// Get powers from base.
+pub(crate) fn get_powers() -> &'static ModeratePathPowers {
+    &BASE10_POWERS
+}
@@ -0,0 +1,32 @@
+use super::float::ExtendedFloat;
+use super::num::*;
+
+// INTO FLOAT
+
+// Export extended-precision float to native float.
+//
+// The extended-precision float must be in native float representation,
+// with overflow/underflow appropriately handled.
+pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
+    where F: Float
+{
+    // Export floating-point number.
+    if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
+        // sub-denormal, underflow
+        F::ZERO
+    } else if fp.exp >= F::MAX_EXPONENT {
+        // overflow
+        F::from_bits(F::INFINITY_BITS)
+    } else {
+        // calculate the exp and fraction bits, and return a float from bits.
+        let exp: u64;
+        if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
+            exp = 0;
+        } else {
+            exp = (fp.exp + F::EXPONENT_BIAS).as_u64();
+        }
+        let exp = exp << F::MANTISSA_SIZE;
+        let mant = fp.mant & F::MANTISSA_MASK.as_u64();
+        F::from_bits(F::Unsigned::as_cast(mant | exp))
+    }
+}
@@ -0,0 +1,111 @@
+// FLOAT TYPE
+
+use super::convert::*;
+use super::num::*;
+use super::rounding::*;
+use super::shift::*;
+
+/// Extended precision floating-point type.
+///
+/// Private implementation, exposed only for testing purposes.
+#[doc(hidden)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct ExtendedFloat {
+    /// Mantissa for the extended-precision float.
+    pub mant: u64,
+    /// Binary exponent for the extended-precision float.
+    pub exp: i32,
+}
+
+impl ExtendedFloat {
+    // PROPERTIES
+
+    // OPERATIONS
+
+    /// Multiply two normalized extended-precision floats, as if by `a*b`.
+    ///
+    /// The precision is maximal when the numbers are normalized, however,
+    /// decent precision will occur as long as both values have high bits
+    /// set. The result is not normalized.
+    ///
+    /// Algorithm:
+    ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
+    ///     2. Normalization of the result (not done here).
+    ///     3. Addition of exponents.
+    pub fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
+        // Logic check, values must be decently normalized prior to multiplication.
+        debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
+
+        // Extract high-and-low masks.
+        let ah = self.mant >> u64::HALF;
+        let al = self.mant & u64::LOMASK;
+        let bh = b.mant >> u64::HALF;
+        let bl = b.mant & u64::LOMASK;
+
+        // Get our products
+        let ah_bl = ah * bl;
+        let al_bh = al * bh;
+        let al_bl = al * bl;
+        let ah_bh = ah * bh;
+
+        let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
+        // round up
+        tmp += 1 << (u64::HALF-1);
+
+        ExtendedFloat {
+            mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
+            exp: self.exp + b.exp + u64::FULL
+        }
+    }
+
+    /// Multiply in-place, as if by `a*b`.
+    ///
+    /// The result is not normalized.
+    pub fn imul(&mut self, b: &ExtendedFloat) {
+        *self = self.mul(b);
+    }
+
+    // NORMALIZE
+
+    /// Normalize float-point number.
+    ///
+    /// Shift the mantissa so the number of leading zeros is 0, or the value
+    /// itself is 0.
+    ///
+    /// Get the number of bytes shifted.
+    pub fn normalize(&mut self) -> u32 {
+        // Note:
+        // Using the cltz intrinsic via leading_zeros is way faster (~10x)
+        // than shifting 1-bit at a time, via while loop, and also way
+        // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
+        // 2, and 1 bit.
+        //
+        // Using a modulus of pow2 (which will get optimized to a bitwise
+        // and with 0x3F or faster) is slightly slower than an if/then,
+        // however, removing the if/then will likely optimize more branched
+        // code as it removes conditional logic.
+
+        // Calculate the number of leading zeros, and then zero-out
+        // any overflowing bits, to avoid shl overflow when self.mant == 0.
+        let shift = if self.mant == 0 { 0 } else { self.mant.leading_zeros() };
+        shl(self, shift as i32);
+        shift
+    }
+
+    // ROUND
+
+    /// Lossy round float-point number to native mantissa boundaries.
+    pub(crate) fn round_to_native<F>(&mut self)
+        where F: Float
+    {
+        round_to_native::<F>(self)
+    }
+
+    // INTO
+
+    /// Convert into lower-precision native float.
+    pub fn into_float<F: Float>(mut self) -> F {
+        self.round_to_native::<F>();
+        into_float(self)
+    }
+}
@@ -0,0 +1,11 @@
+mod cached;
+mod cached_float80;
+mod convert;
+mod float;
+mod num;
+mod parse;
+mod rounding;
+mod shift;
+
+// API.
+pub use self::parse::parse_float;
@@ -0,0 +1,379 @@
+use std::ops;
+
+pub trait AsPrimitive:
+    Sized +
+    Copy +
+    PartialEq +
+    PartialOrd +
+    Send +
+    Sync
+{
+    fn as_u8(self) -> u8;
+    fn as_u16(self) -> u16;
+    fn as_u32(self) -> u32;
+    fn as_u64(self) -> u64;
+    fn as_u128(self) -> u128;
+    fn as_usize(self) -> usize;
+    fn as_i8(self) -> i8;
+    fn as_i16(self) -> i16;
+    fn as_i32(self) -> i32;
+    fn as_i64(self) -> i64;
+    fn as_i128(self) -> i128;
+    fn as_isize(self) -> isize;
+    fn as_f32(self) -> f32;
+    fn as_f64(self) -> f64;
+}
+
+macro_rules! as_primitive_impl {
+    ($($t:tt)*) => ($(
+        impl AsPrimitive for $t {
+            #[inline]
+            fn as_u8(self) -> u8 {
+                self as u8
+            }
+
+            #[inline]
+            fn as_u16(self) -> u16 {
+                self as u16
+            }
+
+            #[inline]
+            fn as_u32(self) -> u32 {
+                self as u32
+            }
+
+            #[inline]
+            fn as_u64(self) -> u64 {
+                self as u64
+            }
+
+            #[inline]
+            fn as_u128(self) -> u128 {
+                self as u128
+            }
+
+            #[inline]
+            fn as_usize(self) -> usize {
+                self as usize
+            }
+
+            #[inline]
+            fn as_i8(self) -> i8 {
+                self as i8
+            }
+
+            #[inline]
+            fn as_i16(self) -> i16 {
+                self as i16
+            }
+
+            #[inline]
+            fn as_i32(self) -> i32 {
+                self as i32
+            }
+
+            #[inline]
+            fn as_i64(self) -> i64 {
+                self as i64
+            }
+
+            #[inline]
+            fn as_i128(self) -> i128 {
+                self as i128
+            }
+
+            #[inline]
+            fn as_isize(self) -> isize {
+                self as isize
+            }
+
+            #[inline]
+            fn as_f32(self) -> f32 {
+                self as f32
+            }
+
+            #[inline]
+            fn as_f64(self) -> f64 {
+                self as f64
+            }
+        }
+    )*)
+}
+
+as_primitive_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
+
+/// An interface for casting between machine scalars.
+pub trait AsCast: AsPrimitive {
+    /// Creates a number from another value that can be converted into
+    /// a primitive via the `AsPrimitive` trait.
+    fn as_cast<N: AsPrimitive>(n: N) -> Self;
+}
+
+macro_rules! as_cast_impl {
+    ($t:ty, $meth:ident) => {
+        impl AsCast for $t {
+            #[inline]
+            fn as_cast<N: AsPrimitive>(n: N) -> $t {
+                n.$meth()
+            }
+        }
+    };
+}
+
+as_cast_impl!(u8, as_u8);
+as_cast_impl!(u16, as_u16);
+as_cast_impl!(u32, as_u32);
+as_cast_impl!(u64, as_u64);
+as_cast_impl!(u128, as_u128);
+as_cast_impl!(usize, as_usize);
+as_cast_impl!(i8, as_i8);
+as_cast_impl!(i16, as_i16);
+as_cast_impl!(i32, as_i32);
+as_cast_impl!(i64, as_i64);
+as_cast_impl!(i128, as_i128);
+as_cast_impl!(isize, as_isize);
+as_cast_impl!(f32, as_f32);
+as_cast_impl!(f64, as_f64);
+
+pub trait Number:
+    AsCast +
+    ops::Add<Output=Self> +
+    ops::AddAssign +
+    ops::Div<Output=Self> +
+    ops::DivAssign +
+    ops::Mul<Output=Self> +
+    ops::MulAssign +
+    ops::Rem<Output=Self> +
+    ops::RemAssign +
+    ops::Sub<Output=Self> +
+    ops::SubAssign
+{}
+
+macro_rules! number_impl {
+    ($($t:tt)*) => ($(
+        impl Number for $t {
+        }
+    )*)
+}
+
+number_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
+
+pub trait Integer:
+    Number +
+    ops::BitAnd<Output=Self> +
+    ops::BitAndAssign
+{
+    const ZERO: Self;
+}
+
+macro_rules! integer_impl {
+    ($($t:tt)*) => ($(
+        impl Integer for $t {
+            const ZERO: $t = 0;
+        }
+    )*)
+}
+
+integer_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize }
+
+pub trait UnsignedInteger: Integer {}
+
+macro_rules! unsigned_integer_impl {
+    ($($t:ty)*) => ($(
+        impl UnsignedInteger for $t {}
+    )*)
+}
+
+unsigned_integer_impl! { u8 u16 u32 u64 u128 usize }
+
+/// Type trait for the mantissa type.
+pub trait Mantissa: UnsignedInteger {
+    /// Mask for the left-most bit, to check if the value is normalized.
+    const NORMALIZED_MASK: Self;
+    /// Mask to extract the high bits from the integer.
+    const HIMASK: Self;
+    /// Mask to extract the low bits from the integer.
+    const LOMASK: Self;
+    /// Full size of the integer, in bits.
+    const FULL: i32;
+    /// Half size of the integer, in bits.
+    const HALF: i32 = Self::FULL / 2;
+}
+
+impl Mantissa for u64 {
+    const NORMALIZED_MASK: u64  = 0x8000000000000000;
+    const HIMASK: u64           = 0xFFFFFFFF00000000;
+    const LOMASK: u64           = 0x00000000FFFFFFFF;
+    const FULL: i32             = 64;
+}
+
+/// Get exact exponent limit for radix.
+pub trait Float: Number {
+    /// Unsigned type of the same size.
+    type Unsigned: UnsignedInteger;
+
+    const ZERO: Self;
+
+    // MASKS
+
+    /// Bitmask for the sign bit.
+    const SIGN_MASK: Self::Unsigned;
+    /// Bitmask for the exponent, including the hidden bit.
+    const EXPONENT_MASK: Self::Unsigned;
+    /// Bitmask for the hidden bit in exponent, which is an implicit 1 in the fraction.
+    const HIDDEN_BIT_MASK: Self::Unsigned;
+    /// Bitmask for the mantissa (fraction), excluding the hidden bit.
+    const MANTISSA_MASK: Self::Unsigned;
+
+    // PROPERTIES
+
+    /// Positive infinity as bits.
+    const INFINITY_BITS: Self::Unsigned;
+    /// Positive infinity as bits.
+    const NEGATIVE_INFINITY_BITS: Self::Unsigned;
+    /// Size of the significand (mantissa) without hidden bit.
+    const MANTISSA_SIZE: i32;
+    /// Bias of the exponet
+    const EXPONENT_BIAS: i32;
+    /// Exponent portion of a denormal float.
+    const DENORMAL_EXPONENT: i32;
+    /// Maximum exponent value in float.
+    const MAX_EXPONENT: i32;
+
+    // ROUNDING
+
+    /// Default number of bits to shift (or 64 - mantissa size - 1).
+    const DEFAULT_SHIFT: i32;
+    /// Mask to determine if a full-carry occurred (1 in bit above hidden bit).
+    const CARRY_MASK: u64;
+
+    /// Get min and max exponent limits (exact) from radix.
+    fn exponent_limit() -> (i32, i32);
+
+    /// Get the number of digits that can be shifted from exponent to mantissa.
+    fn mantissa_limit() -> i32;
+
+    // Re-exported methods from std.
+    fn powi(self, n: i32) -> Self;
+    fn from_bits(u: Self::Unsigned) -> Self;
+    fn to_bits(self) -> Self::Unsigned;
+
+    /// Returns true if the float is a denormal.
+    #[inline]
+    fn is_denormal(self) -> bool {
+        self.to_bits() & Self::EXPONENT_MASK == Self::Unsigned::ZERO
+    }
+
+    /// Get exponent component from the float.
+    #[inline]
+    fn exponent(self) -> i32 {
+        if self.is_denormal() {
+            return Self::DENORMAL_EXPONENT;
+        }
+
+        let bits = self.to_bits();
+        let biased_e: i32 = (bits & Self::EXPONENT_MASK).as_i32() >> Self::MANTISSA_SIZE;
+        biased_e - Self::EXPONENT_BIAS
+    }
+
+    /// Get mantissa (significand) component from float.
+    #[inline]
+    fn mantissa(self) -> Self::Unsigned {
+        let bits = self.to_bits();
+        let s = bits & Self::MANTISSA_MASK;
+        if !self.is_denormal() {
+            s + Self::HIDDEN_BIT_MASK
+        } else {
+            s
+        }
+    }
+}
+
+impl Float for f32 {
+    type Unsigned = u32;
+
+    const ZERO: f32 = 0.0;
+
+    const SIGN_MASK: u32            = 0x80000000;
+    const EXPONENT_MASK: u32        = 0x7F800000;
+    const HIDDEN_BIT_MASK: u32      = 0x00800000;
+    const MANTISSA_MASK: u32        = 0x007FFFFF;
+    const INFINITY_BITS: u32        = 0x7F800000;
+    const NEGATIVE_INFINITY_BITS: u32 = Self::INFINITY_BITS | Self::SIGN_MASK;
+    const MANTISSA_SIZE: i32        = 23;
+    const EXPONENT_BIAS: i32        = 127 + Self::MANTISSA_SIZE;
+    const DENORMAL_EXPONENT: i32    = 1 - Self::EXPONENT_BIAS;
+    const MAX_EXPONENT: i32         = 0xFF - Self::EXPONENT_BIAS;
+    const DEFAULT_SHIFT: i32        = u64::FULL - f32::MANTISSA_SIZE - 1;
+    const CARRY_MASK: u64           = 0x1000000;
+
+    #[inline]
+    fn exponent_limit() -> (i32, i32) {
+        (-10, 10)
+    }
+
+    #[inline]
+    fn mantissa_limit() -> i32 {
+        7
+    }
+
+    #[inline]
+    fn powi(self, n: i32) -> f32 {
+        f32::powi(self, n)
+    }
+
+    #[inline]
+    fn from_bits(u: u32) -> f32 {
+        f32::from_bits(u)
+    }
+
+    #[inline]
+    fn to_bits(self) -> u32 {
+        f32::to_bits(self)
+    }
+}
+
+
+impl Float for f64 {
+    type Unsigned = u64;
+
+    const ZERO: f64 = 0.0;
+
+    const SIGN_MASK: u64            = 0x8000000000000000;
+    const EXPONENT_MASK: u64        = 0x7FF0000000000000;
+    const HIDDEN_BIT_MASK: u64      = 0x0010000000000000;
+    const MANTISSA_MASK: u64        = 0x000FFFFFFFFFFFFF;
+    const INFINITY_BITS: u64        = 0x7FF0000000000000;
+    const NEGATIVE_INFINITY_BITS: u64 = Self::INFINITY_BITS | Self::SIGN_MASK;
+    const MANTISSA_SIZE: i32        = 52;
+    const EXPONENT_BIAS: i32        = 1023 + Self::MANTISSA_SIZE;
+    const DENORMAL_EXPONENT: i32    = 1 - Self::EXPONENT_BIAS;
+    const MAX_EXPONENT: i32         = 0x7FF - Self::EXPONENT_BIAS;
+    const DEFAULT_SHIFT: i32        = u64::FULL - f64::MANTISSA_SIZE - 1;
+    const CARRY_MASK: u64           = 0x20000000000000;
+
+    #[inline]
+    fn exponent_limit() -> (i32, i32) {
+        (-22, 22)
+    }
+
+    #[inline]
+    fn mantissa_limit() -> i32 {
+        15
+    }
+
+    #[inline]
+    fn powi(self, n: i32) -> f64 {
+        f64::powi(self, n)
+    }
+
+    #[inline]
+    fn from_bits(u: u64) -> f64 {
+        f64::from_bits(u)
+    }
+
+    #[inline]
+    fn to_bits(self) -> u64 {
+        f64::to_bits(self)
+    }
+}
@@ -0,0 +1,158 @@
+use super::cached::*;
+use super::float::ExtendedFloat;
+use super::num::*;
+
+const POW10: [u64; 20] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000];
+
+// FAST
+// ----
+
+/// Convert mantissa to exact value for a non-base2 power.
+///
+/// Returns the resulting float and if the value can be represented exactly.
+fn fast_path<F>(mantissa: u64, exponent: i32)
+    -> Option<F>
+    where F: Float
+{
+    // `mantissa >> (F::MANTISSA_SIZE+1) != 0` effectively checks if the
+    // value has a no bits above the hidden bit, which is what we want.
+    let (min_exp, max_exp) = F::exponent_limit();
+    let shift_exp = F::mantissa_limit();
+    let mantissa_size = F::MANTISSA_SIZE + 1;
+    if mantissa >> mantissa_size != 0 {
+        // Would require truncation of the mantissa.
+        None
+    } else if exponent == 0 {
+        // 0 exponent, same as value, exact representation.
+        let float = F::as_cast(mantissa);
+        Some(float)
+    } else if exponent >= min_exp && exponent <= max_exp {
+        // Value can be exactly represented, return the value.
+        // Use powi, since it's correct, and faster on
+        // the fast-path.
+        let float = F::as_cast(mantissa);
+        let base = F::as_cast(10i32);
+        Some(float.mul(base.powi(exponent)))
+    } else if exponent >= 0 && exponent <= max_exp + shift_exp {
+        // Check to see if we have a disguised fast-path, where the
+        // number of digits in the mantissa is very small, but and
+        // so digits can be shifted from the exponent to the mantissa.
+        // https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+        let small_powers = POW10;
+        let shift = exponent - max_exp;
+        let power = small_powers[shift.as_usize()];
+
+        // Compute the product of the power, if it overflows,
+        // prematurely return early, otherwise, if we didn't overshoot,
+        // we can get an exact value.
+        let value = mantissa.checked_mul(power)?;
+        if value >> mantissa_size != 0 {
+            None
+        } else {
+            // Use powi, since it's correct, and faster on
+            // the fast-path.
+            let float = F::as_cast(value);
+            let base = F::as_cast(10i32);
+            Some(float.mul(base.powi(max_exp)))
+        }
+    } else {
+        // Cannot be exactly represented, exponent too small or too big,
+        // would require truncation.
+        None
+    }
+}
+
+// MODERATE
+// --------
+
+/// Multiply the floating-point by the exponent.
+///
+/// Multiply by pre-calculated powers of the base, modify the extended-
+/// float, and return if new value and if the value can be represented
+/// accurately.
+fn multiply_exponent_extended<F>(fp: &mut ExtendedFloat, exponent: i32)
+    where F: Float
+{
+    let powers = ExtendedFloat::get_powers();
+    let exponent = exponent.saturating_add(powers.bias);
+    let small_index = exponent % powers.step;
+    let large_index = exponent / powers.step;
+    if exponent < 0 {
+        // Guaranteed underflow (assign 0).
+        fp.mant = 0;
+    } else if large_index as usize >= powers.large.len() {
+        // Overflow (assign infinity)
+        fp.mant = 1 << 63;
+        fp.exp = 0x7FF;
+    } else {
+        // Within the valid exponent range, multiply by the large and small
+        // exponents and return the resulting value.
+
+        // Multiply by the small power.
+        // Check if we can directly multiply by an integer, if not,
+        // use extended-precision multiplication.
+        match fp.mant.overflowing_mul(powers.get_small_int(small_index.as_usize())) {
+            // Overflow, multiplication unsuccessful, go slow path.
+            (_, true)     => {
+                fp.normalize();
+                fp.imul(&powers.get_small(small_index.as_usize()));
+            },
+            // No overflow, multiplication successful.
+            (mant, false) => {
+                fp.mant = mant;
+                fp.normalize();
+            },
+        }
+
+        // Multiply by the large power
+        fp.imul(&powers.get_large(large_index.as_usize()));
+
+        // Normalize the floating point (and the errors).
+        fp.normalize();
+    }
+}
+
+/// Create a precise native float using an intermediate extended-precision float.
+///
+/// Return the float approximation and if the value can be accurately
+/// represented with mantissa bits of precision.
+pub(super) fn moderate_path<F>(mantissa: u64, exponent: i32) -> F
+    where F: Float
+{
+    let mut fp = ExtendedFloat { mant: mantissa, exp: 0 };
+    multiply_exponent_extended::<F>(&mut fp, exponent);
+    fp.into_float::<F>()
+}
+
+// PARSE
+// -----
+
+/// Parse non-power-of-two radix string to native float.
+///
+/// * `mantissa`        - Significant digits for float.
+/// * `exponent`        - Mantissa exponent in decimal.
+///
+/// # Warning
+/// The exponent is not the parsed exponent, for example:
+///     "2.543" would have a mantissa of `2543` and an exponent of `-3`,
+///     signifying it should be 2543 * 10^-3.
+pub fn parse_float<F>(mantissa: u64, exponent: i32, truncated: bool) -> F
+    where F: Float
+{
+    // Process the state to a float.
+    if mantissa == 0 {
+        // Literal 0, return early.
+        // Value cannot be truncated, since truncation only occurs on
+        // overflow or underflow.
+        F::ZERO
+    } else if !truncated {
+        // Try the fast path, no mantissa truncation.
+        if let Some(float) = fast_path::<F>(mantissa, exponent) {
+            float
+        } else {
+            moderate_path(mantissa, exponent)
+        }
+    } else {
+        moderate_path(mantissa, exponent)
+    }
+}
@@ -0,0 +1,160 @@
+use std::mem;
+use super::float::ExtendedFloat;
+use super::num::*;
+use super::shift::*;
+
+/// Calculate a scalar factor of 2 above the halfway point.
+#[inline]
+fn nth_bit(n: u64) -> u64
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!(n < bits, "nth_bit() overflow in shl.");
+
+    1 << n
+}
+
+/// Generate a bitwise mask for the lower `n` bits.
+#[inline]
+fn lower_n_mask(n: u64) -> u64
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!(n <= bits, "lower_n_mask() overflow in shl.");
+
+    match n == bits {
+        true  => u64::max_value(),
+        false => (1 << n) - 1,
+    }
+}
+
+/// Calculate the halfway point for the lower `n` bits.
+#[inline]
+fn lower_n_halfway(n: u64) -> u64
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!(n <= bits, "lower_n_halfway() overflow in shl.");
+
+    match n == 0 {
+        true  => 0,
+        false => nth_bit(n - 1),
+    }
+}
+
+/// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position.
+#[inline]
+fn internal_n_mask(bit: u64, n: u64) -> u64 {
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl.");
+    debug_assert!(n <= bits, "internal_n_halfway() overflow in shl.");
+    debug_assert!(bit >= n, "internal_n_halfway() overflow in sub.");
+
+    lower_n_mask(bit) ^ lower_n_mask(bit - n)
+}
+
+// Shift right N-bytes and round to the nearest.
+//
+// Return if we are above halfway and if we are halfway.
+pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32)
+    -> (bool, bool)
+{
+    // Extract the truncated bits using mask.
+    // Calculate if the value of the truncated bits are either above
+    // the mid-way point, or equal to it.
+    //
+    // For example, for 4 truncated bytes, the mask would be b1111
+    // and the midway point would be b1000.
+    let mask: u64 = lower_n_mask(shift as u64);
+    let halfway: u64 = lower_n_halfway(shift as u64);
+
+    let truncated_bits = fp.mant & mask;
+    let is_above = truncated_bits > halfway;
+    let is_halfway = truncated_bits == halfway;
+
+    // Bit shift so the leading bit is in the hidden bit.
+    overflowing_shr(fp, shift);
+
+    (is_above, is_halfway)
+}
+
+// ROUND TO FLOAT
+
+// Shift the ExtendedFloat fraction to the fraction bits in a native float.
+//
+// Floating-point arithmetic uses round to nearest, ties to even,
+// which rounds to the nearest value, if the value is halfway in between,
+// round to an even value.
+pub(crate) fn round_to_float<F>(fp: &mut ExtendedFloat)
+    where F: Float
+{
+    // Calculate the difference to allow a single calculation
+    // rather than a loop, to minimize the number of ops required.
+    // This does underflow detection.
+    let final_exp = fp.exp + F::DEFAULT_SHIFT;
+    if final_exp < F::DENORMAL_EXPONENT {
+        // We would end up with a denormal exponent, try to round to more
+        // digits. Only shift right if we can avoid zeroing out the value,
+        // which requires the exponent diff to be < M::BITS. The value
+        // is already normalized, so we shouldn't have any issue zeroing
+        // out the value.
+        let diff = F::DENORMAL_EXPONENT - fp.exp;
+        if diff <= u64::FULL {
+            // We can avoid underflow, can get a valid representation.
+            round_nearest(fp, diff);
+        } else {
+            // Certain underflow, assign literal 0s.
+            fp.mant = 0;
+            fp.exp = 0;
+        }
+    } else {
+        round_nearest(fp, F::DEFAULT_SHIFT);
+    }
+
+    if fp.mant & F::CARRY_MASK == F::CARRY_MASK {
+        // Roundup carried over to 1 past the hidden bit.
+        shr(fp, 1);
+    }
+}
+
+// AVOID OVERFLOW/UNDERFLOW
+
+// Avoid overflow for large values, shift left as needed.
+//
+// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
+fn avoid_overflow<F>(fp: &mut ExtendedFloat)
+    where F: Float
+{
+    // Calculate the difference to allow a single calculation
+    // rather than a loop, minimizing the number of ops required.
+    if fp.exp >= F::MAX_EXPONENT {
+        let diff = fp.exp - F::MAX_EXPONENT;
+        if diff <= F::MANTISSA_SIZE {
+            // Our overflow mask needs to start at the hidden bit, or at
+            // `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
+            // to see if our value overflows.
+            let bit = (F::MANTISSA_SIZE + 1).as_u64();
+            let n = (diff + 1).as_u64();
+            let mask = internal_n_mask(bit, n);
+            if (fp.mant & mask) == 0 {
+                // If we have no 1-bit in the hidden-bit position,
+                // which is index 0, we need to shift 1.
+                let shift = diff + 1;
+                shl(fp, shift);
+            }
+        }
+    }
+}
+
+// ROUND TO NATIVE
+
+// Round an extended-precision float to a native float representation.
+pub(crate) fn round_to_native<F>(fp: &mut ExtendedFloat)
+    where F: Float
+{
+    // Shift all the way left, to ensure a consistent representation.
+    // The following right-shifts do not work for a non-normalized number.
+    fp.normalize();
+
+    // Round so the fraction is in a native mantissa representation,
+    // and avoid overflow/underflow.
+    round_to_float::<F>(fp);
+    avoid_overflow::<F>(fp);
+}
@@ -0,0 +1,38 @@
+use std::mem;
+use super::float::ExtendedFloat;
+
+// Shift extended-precision float right `shift` bytes.
+pub(crate) fn shr(fp: &mut ExtendedFloat, shift: i32)
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!((shift as u64) < bits, "shr() overflow in shift right.");
+
+    fp.mant >>= shift;
+    fp.exp += shift;
+}
+
+// Shift extended-precision float right `shift` bytes.
+//
+// Accepts when the shift is the same as the type size, and
+// sets the value to 0.
+pub(crate) fn overflowing_shr(fp: &mut ExtendedFloat, shift: i32)
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!((shift as u64) <= bits, "overflowing_shr() overflow in shift right.");
+
+    fp.mant = match shift as u64 == bits {
+        true  => 0,
+        false => fp.mant >> shift,
+    };
+    fp.exp += shift;
+}
+
+// Shift extended-precision float left `shift` bytes.
+pub(crate) fn shl(fp: &mut ExtendedFloat, shift: i32)
+{
+    let bits: u64 = mem::size_of::<u64>() as u64 * 8;
+    debug_assert!((shift as u64) < bits, "shl() overflow in shift left.");
+
+    fp.mant <<= shift;
+    fp.exp -= shift;
+}