Initial commit.

This commit is contained in:
Alex Huszagh
2020-02-06 23:18:19 -06:00
commit fe3c784a0b
10 changed files with 1316 additions and 0 deletions
+4
View File
@@ -0,0 +1,4 @@
[package]
authors = ["Alex Huszagh <ahuszagh@gmail.com>"]
name = "minimal_lexical"
version = "0.0.1"
+78
View File
@@ -0,0 +1,78 @@
use super::cached_float80;
use super::float::ExtendedFloat;
// POWERS
/// Precalculated powers that uses two-separate arrays for memory-efficiency.
#[doc(hidden)]
pub(crate) struct ExtendedFloatArray {
// Pre-calculated mantissa for the powers.
pub mant: &'static [u64],
// Pre-calculated binary exponents for the powers.
pub exp: &'static [i32],
}
/// Allow indexing of values without bounds checking
impl ExtendedFloatArray {
#[inline]
pub fn get_extended_float(&self, index: usize) -> ExtendedFloat {
let mant = self.mant[index];
let exp = self.exp[index];
ExtendedFloat { mant: mant, exp: exp }
}
#[inline]
pub fn len(&self) -> usize {
self.mant.len()
}
}
// MODERATE PATH POWERS
/// Precalculated powers of base N for the moderate path.
#[doc(hidden)]
pub(crate) struct ModeratePathPowers {
// Pre-calculated small powers.
pub small: ExtendedFloatArray,
// Pre-calculated large powers.
pub large: ExtendedFloatArray,
/// Pre-calculated small powers as 64-bit integers
pub small_int: &'static [u64],
// Step between large powers and number of small powers.
pub step: i32,
// Exponent bias for the large powers.
pub bias: i32,
}
/// Allow indexing of values without bounds checking
impl ModeratePathPowers {
#[inline]
pub fn get_small(&self, index: usize) -> ExtendedFloat {
self.small.get_extended_float(index)
}
#[inline]
pub fn get_large(&self, index: usize) -> ExtendedFloat {
self.large.get_extended_float(index)
}
#[inline]
pub fn get_small_int(&self, index: usize) -> u64 {
self.small_int[index]
}
}
// CACHED EXTENDED POWERS
/// Cached powers as a trait for a floating-point type.
pub(super) trait ModeratePathCache {
/// Get cached powers.
fn get_powers() -> &'static ModeratePathPowers;
}
impl ModeratePathCache for ExtendedFloat {
#[inline]
fn get_powers() -> &'static ModeratePathPowers {
cached_float80::get_powers()
}
}
+345
View File
@@ -0,0 +1,345 @@
//! Cached exponents for basen values with 80-bit extended floats.
//!
//! Exact versions of base**n as an extended-precision float, with both
//! large and small powers. Use the large powers to minimize the amount
//! of compounded error.
//!
//! These values were calculated using Python, using the arbitrary-precision
//! integer to calculate exact extended-representation of each value.
//! These values are all normalized.
//!
//! This files takes ~ 26KB of storage.
//!
//! This file is mostly automatically generated, do not change values
//! manually, unless you know what you are doing. The script to generate
//! the values is as follows:
//!
//! ```text
//! import math
//! from collections import deque
//!
//! STEP_STR = "const BASE{0}_STEP: i32 = {1};"
//! SMALL_MANTISSA_STR = "const BASE{0}_SMALL_MANTISSA: [u64; {1}] = ["
//! SMALL_EXPONENT_STR = "const BASE{0}_SMALL_EXPONENT: [i32; {1}] = ["
//! LARGE_MANTISSA_STR = "const BASE{0}_LARGE_MANTISSA: [u64; {1}] = ["
//! LARGE_EXPONENT_STR = "const BASE{0}_LARGE_EXPONENT: [i32; {1}] = ["
//! SMALL_INT_STR = "const BASE{0}_SMALL_INT_POWERS: [u64; {1}] = {2};"
//! BIAS_STR = "const BASE{0}_BIAS: i32 = {1};"
//! EXP_STR = "// {}^{}"
//! POWER_STR = """pub(crate) const BASE{0}_POWERS: ModeratePathPowers<u64> = ModeratePathPowers {{
//! small: ExtendedFloatArray {{ mant: &BASE{0}_SMALL_MANTISSA, exp: &BASE{0}_SMALL_EXPONENT }},
//! large: ExtendedFloatArray {{ mant: &BASE{0}_LARGE_MANTISSA, exp: &BASE{0}_LARGE_EXPONENT }},
//! small_int: &BASE{0}_SMALL_INT_POWERS,
//! step: BASE{0}_STEP,
//! bias: BASE{0}_BIAS,
//! }};\n"""
//!
//! def calculate_bitshift(base, exponent):
//! '''
//! Calculate the bitshift required for a given base. The exponent
//! is the absolute value of the max exponent (log distance from 1.)
//! '''
//!
//! return 63 + math.ceil(math.log2(base**exponent))
//!
//!
//! def next_fp(fp, base, step = 1):
//! '''Generate the next extended-floating point value.'''
//!
//! return (fp[0] * (base**step), fp[1])
//!
//!
//! def prev_fp(fp, base, step = 1):
//! '''Generate the previous extended-floating point value.'''
//!
//! return (fp[0] // (base**step), fp[1])
//!
//!
//! def normalize_fp(fp):
//! '''Normalize a extended-float so the MSB is the 64th bit'''
//!
//! while fp[0] >> 64 != 0:
//! fp = (fp[0] >> 1, fp[1] + 1)
//! return fp
//!
//!
//! def generate_small(base, count):
//! '''Generate the small powers for a given base'''
//!
//! bitshift = calculate_bitshift(base, count)
//! fps = []
//! fp = (1 << bitshift, -bitshift)
//! for exp in range(count):
//! fps.append((normalize_fp(fp), exp))
//! fp = next_fp(fp, base)
//!
//! # Print the small powers as integers.
//! ints = [base**i for _, i in fps]
//!
//! return fps, ints
//!
//!
//! def generate_large(base, step):
//! '''Generate the large powers for a given base.'''
//!
//! # Get our starting parameters
//! min_exp = math.floor(math.log(5e-324, base) - math.log(0xFFFFFFFFFFFFFFFF, base))
//! max_exp = math.ceil(math.log(1.7976931348623157e+308, base))
//! bitshift = calculate_bitshift(base, abs(min_exp - step))
//! fps = deque()
//!
//! # Add negative exponents
//! # We need to go below the minimum exponent, since we need
//! # all resulting exponents to be positive.
//! fp = (1 << bitshift, -bitshift)
//! for exp in range(-step, min_exp-step, -step):
//! fp = prev_fp(fp, base, step)
//! fps.appendleft((normalize_fp(fp), exp))
//!
//! # Add positive exponents
//! fp = (1 << bitshift, -bitshift)
//! fps.append((normalize_fp(fp), 0))
//! for exp in range(step, max_exp, step):
//! fp = next_fp(fp, base, step)
//! fps.append((normalize_fp(fp), exp))
//!
//! # Return the smallest exp, AKA, the bias
//! return fps, -fps[0][1]
//!
//!
//! def print_array(base, string, fps, index):
//! '''Print an entire array'''
//!
//! print(string.format(base, len(fps)))
//! for fp, exp in fps:
//! value = " {},".format(fp[index])
//! exp = EXP_STR.format(base, exp)
//! print(value.ljust(30, " ") + exp)
//! print("];")
//!
//!
//! def generate_base(base):
//! '''Generate all powers and variables.'''
//!
//! step = math.floor(math.log(1e10, base))
//! small, ints = generate_small(base, step)
//! large, bias = generate_large(base, step)
//!
//! print_array(base, SMALL_MANTISSA_STR, small, 0)
//! print_array(base, SMALL_EXPONENT_STR, small, 1)
//! print_array(base, LARGE_MANTISSA_STR, large, 0)
//! print_array(base, LARGE_EXPONENT_STR, large, 1)
//! print(SMALL_INT_STR.format(base, len(ints), ints))
//! print(STEP_STR.format(base, step))
//! print(BIAS_STR.format(base, bias))
//!
//!
//! def generate():
//! '''Generate all bases.'''
//!
//! bases = [
//! 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21,
//! 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36
//! ]
//!
//! for base in bases:
//! print("// BASE{}\n".format(base))
//! generate_base(base)
//! print("")
//!
//! print("// HIGH LEVEL\n// ----------\n")
//!
//! for base in bases:
//! print(POWER_STR.format(base))
//!
//!
//! if __name__ == '__main__':
//! generate()
//! ```
use super::cached::{ExtendedFloatArray, ModeratePathPowers};
// LOW-LEVEL
// ---------
// BASE10
const BASE10_SMALL_MANTISSA: [u64; 10] = [
9223372036854775808, // 10^0
11529215046068469760, // 10^1
14411518807585587200, // 10^2
18014398509481984000, // 10^3
11258999068426240000, // 10^4
14073748835532800000, // 10^5
17592186044416000000, // 10^6
10995116277760000000, // 10^7
13743895347200000000, // 10^8
17179869184000000000, // 10^9
];
const BASE10_SMALL_EXPONENT: [i32; 10] = [
-63, // 10^0
-60, // 10^1
-57, // 10^2
-54, // 10^3
-50, // 10^4
-47, // 10^5
-44, // 10^6
-40, // 10^7
-37, // 10^8
-34, // 10^9
];
const BASE10_LARGE_MANTISSA: [u64; 66] = [
11555125961253852697, // 10^-350
13451937075301367670, // 10^-340
15660115838168849784, // 10^-330
18230774251475056848, // 10^-320
10611707258198326947, // 10^-310
12353653155963782858, // 10^-300
14381545078898527261, // 10^-290
16742321987285426889, // 10^-280
9745314011399999080, // 10^-270
11345038669416679861, // 10^-260
13207363278391631158, // 10^-250
15375394465392026070, // 10^-240
17899314949046850752, // 10^-230
10418772551374772303, // 10^-220
12129047596099288555, // 10^-210
14120069793541087484, // 10^-200
16437924692338667210, // 10^-190
9568131466127621947, // 10^-180
11138771039116687545, // 10^-170
12967236152753102995, // 10^-160
15095849699286165408, // 10^-150
17573882009934360870, // 10^-140
10229345649675443343, // 10^-130
11908525658859223294, // 10^-120
13863348470604074297, // 10^-110
16139061738043178685, // 10^-100
9394170331095332911, // 10^-90
10936253623915059621, // 10^-80
12731474852090538039, // 10^-70
14821387422376473014, // 10^-60
17254365866976409468, // 10^-50
10043362776618689222, // 10^-40
11692013098647223345, // 10^-30
13611294676837538538, // 10^-20
15845632502852867518, // 10^-10
9223372036854775808, // 10^0
10737418240000000000, // 10^10
12500000000000000000, // 10^20
14551915228366851806, // 10^30
16940658945086006781, // 10^40
9860761315262647567, // 10^50
11479437019748901445, // 10^60
13363823550460978230, // 10^70
15557538194652854267, // 10^80
18111358157653424735, // 10^90
10542197943230523224, // 10^100
12272733663244316382, // 10^110
14287342391028437277, // 10^120
16632655625031838749, // 10^130
9681479787123295682, // 10^140
11270725851789228247, // 10^150
13120851772591970218, // 10^160
15274681817498023410, // 10^170
17782069995880619867, // 10^180
10350527006597618960, // 10^190
12049599325514420588, // 10^200
14027579833653779454, // 10^210
16330252207878254650, // 10^220
9505457831475799117, // 10^230
11065809325636130661, // 10^240
12882297539194266616, // 10^250
14996968138956309548, // 10^260
17458768723248864463, // 10^270
10162340898095201970, // 10^280
11830521861667747109, // 10^290
13772540099066387756, // 10^300
];
const BASE10_LARGE_EXPONENT: [i32; 66] = [
-1226, // 10^-350
-1193, // 10^-340
-1160, // 10^-330
-1127, // 10^-320
-1093, // 10^-310
-1060, // 10^-300
-1027, // 10^-290
-994, // 10^-280
-960, // 10^-270
-927, // 10^-260
-894, // 10^-250
-861, // 10^-240
-828, // 10^-230
-794, // 10^-220
-761, // 10^-210
-728, // 10^-200
-695, // 10^-190
-661, // 10^-180
-628, // 10^-170
-595, // 10^-160
-562, // 10^-150
-529, // 10^-140
-495, // 10^-130
-462, // 10^-120
-429, // 10^-110
-396, // 10^-100
-362, // 10^-90
-329, // 10^-80
-296, // 10^-70
-263, // 10^-60
-230, // 10^-50
-196, // 10^-40
-163, // 10^-30
-130, // 10^-20
-97, // 10^-10
-63, // 10^0
-30, // 10^10
3, // 10^20
36, // 10^30
69, // 10^40
103, // 10^50
136, // 10^60
169, // 10^70
202, // 10^80
235, // 10^90
269, // 10^100
302, // 10^110
335, // 10^120
368, // 10^130
402, // 10^140
435, // 10^150
468, // 10^160
501, // 10^170
534, // 10^180
568, // 10^190
601, // 10^200
634, // 10^210
667, // 10^220
701, // 10^230
734, // 10^240
767, // 10^250
800, // 10^260
833, // 10^270
867, // 10^280
900, // 10^290
933, // 10^300
];
const BASE10_SMALL_INT_POWERS: [u64; 10] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000];
const BASE10_STEP: i32 = 10;
const BASE10_BIAS: i32 = 350;
// HIGH LEVEL
// ----------
pub(crate) const BASE10_POWERS: ModeratePathPowers = ModeratePathPowers {
small: ExtendedFloatArray { mant: &BASE10_SMALL_MANTISSA, exp: &BASE10_SMALL_EXPONENT },
large: ExtendedFloatArray { mant: &BASE10_LARGE_MANTISSA, exp: &BASE10_LARGE_EXPONENT },
small_int: &BASE10_SMALL_INT_POWERS,
step: BASE10_STEP,
bias: BASE10_BIAS,
};
/// Get powers from base.
pub(crate) fn get_powers() -> &'static ModeratePathPowers {
&BASE10_POWERS
}
+32
View File
@@ -0,0 +1,32 @@
use super::float::ExtendedFloat;
use super::num::*;
// INTO FLOAT
// Export extended-precision float to native float.
//
// The extended-precision float must be in native float representation,
// with overflow/underflow appropriately handled.
pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
where F: Float
{
// Export floating-point number.
if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
// sub-denormal, underflow
F::ZERO
} else if fp.exp >= F::MAX_EXPONENT {
// overflow
F::from_bits(F::INFINITY_BITS)
} else {
// calculate the exp and fraction bits, and return a float from bits.
let exp: u64;
if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
exp = 0;
} else {
exp = (fp.exp + F::EXPONENT_BIAS).as_u64();
}
let exp = exp << F::MANTISSA_SIZE;
let mant = fp.mant & F::MANTISSA_MASK.as_u64();
F::from_bits(F::Unsigned::as_cast(mant | exp))
}
}
+111
View File
@@ -0,0 +1,111 @@
// FLOAT TYPE
use super::convert::*;
use super::num::*;
use super::rounding::*;
use super::shift::*;
/// Extended precision floating-point type.
///
/// Private implementation, exposed only for testing purposes.
#[doc(hidden)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ExtendedFloat {
/// Mantissa for the extended-precision float.
pub mant: u64,
/// Binary exponent for the extended-precision float.
pub exp: i32,
}
impl ExtendedFloat {
// PROPERTIES
// OPERATIONS
/// Multiply two normalized extended-precision floats, as if by `a*b`.
///
/// The precision is maximal when the numbers are normalized, however,
/// decent precision will occur as long as both values have high bits
/// set. The result is not normalized.
///
/// Algorithm:
/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
/// 2. Normalization of the result (not done here).
/// 3. Addition of exponents.
pub fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
// Logic check, values must be decently normalized prior to multiplication.
debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
// Extract high-and-low masks.
let ah = self.mant >> u64::HALF;
let al = self.mant & u64::LOMASK;
let bh = b.mant >> u64::HALF;
let bl = b.mant & u64::LOMASK;
// Get our products
let ah_bl = ah * bl;
let al_bh = al * bh;
let al_bl = al * bl;
let ah_bh = ah * bh;
let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
// round up
tmp += 1 << (u64::HALF-1);
ExtendedFloat {
mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
exp: self.exp + b.exp + u64::FULL
}
}
/// Multiply in-place, as if by `a*b`.
///
/// The result is not normalized.
pub fn imul(&mut self, b: &ExtendedFloat) {
*self = self.mul(b);
}
// NORMALIZE
/// Normalize float-point number.
///
/// Shift the mantissa so the number of leading zeros is 0, or the value
/// itself is 0.
///
/// Get the number of bytes shifted.
pub fn normalize(&mut self) -> u32 {
// Note:
// Using the cltz intrinsic via leading_zeros is way faster (~10x)
// than shifting 1-bit at a time, via while loop, and also way
// faster (~2x) than an unrolled loop that checks at 32, 16, 4,
// 2, and 1 bit.
//
// Using a modulus of pow2 (which will get optimized to a bitwise
// and with 0x3F or faster) is slightly slower than an if/then,
// however, removing the if/then will likely optimize more branched
// code as it removes conditional logic.
// Calculate the number of leading zeros, and then zero-out
// any overflowing bits, to avoid shl overflow when self.mant == 0.
let shift = if self.mant == 0 { 0 } else { self.mant.leading_zeros() };
shl(self, shift as i32);
shift
}
// ROUND
/// Lossy round float-point number to native mantissa boundaries.
pub(crate) fn round_to_native<F>(&mut self)
where F: Float
{
round_to_native::<F>(self)
}
// INTO
/// Convert into lower-precision native float.
pub fn into_float<F: Float>(mut self) -> F {
self.round_to_native::<F>();
into_float(self)
}
}
+11
View File
@@ -0,0 +1,11 @@
mod cached;
mod cached_float80;
mod convert;
mod float;
mod num;
mod parse;
mod rounding;
mod shift;
// API.
pub use self::parse::parse_float;
+379
View File
@@ -0,0 +1,379 @@
use std::ops;
pub trait AsPrimitive:
Sized +
Copy +
PartialEq +
PartialOrd +
Send +
Sync
{
fn as_u8(self) -> u8;
fn as_u16(self) -> u16;
fn as_u32(self) -> u32;
fn as_u64(self) -> u64;
fn as_u128(self) -> u128;
fn as_usize(self) -> usize;
fn as_i8(self) -> i8;
fn as_i16(self) -> i16;
fn as_i32(self) -> i32;
fn as_i64(self) -> i64;
fn as_i128(self) -> i128;
fn as_isize(self) -> isize;
fn as_f32(self) -> f32;
fn as_f64(self) -> f64;
}
macro_rules! as_primitive_impl {
($($t:tt)*) => ($(
impl AsPrimitive for $t {
#[inline]
fn as_u8(self) -> u8 {
self as u8
}
#[inline]
fn as_u16(self) -> u16 {
self as u16
}
#[inline]
fn as_u32(self) -> u32 {
self as u32
}
#[inline]
fn as_u64(self) -> u64 {
self as u64
}
#[inline]
fn as_u128(self) -> u128 {
self as u128
}
#[inline]
fn as_usize(self) -> usize {
self as usize
}
#[inline]
fn as_i8(self) -> i8 {
self as i8
}
#[inline]
fn as_i16(self) -> i16 {
self as i16
}
#[inline]
fn as_i32(self) -> i32 {
self as i32
}
#[inline]
fn as_i64(self) -> i64 {
self as i64
}
#[inline]
fn as_i128(self) -> i128 {
self as i128
}
#[inline]
fn as_isize(self) -> isize {
self as isize
}
#[inline]
fn as_f32(self) -> f32 {
self as f32
}
#[inline]
fn as_f64(self) -> f64 {
self as f64
}
}
)*)
}
as_primitive_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
/// An interface for casting between machine scalars.
pub trait AsCast: AsPrimitive {
/// Creates a number from another value that can be converted into
/// a primitive via the `AsPrimitive` trait.
fn as_cast<N: AsPrimitive>(n: N) -> Self;
}
macro_rules! as_cast_impl {
($t:ty, $meth:ident) => {
impl AsCast for $t {
#[inline]
fn as_cast<N: AsPrimitive>(n: N) -> $t {
n.$meth()
}
}
};
}
as_cast_impl!(u8, as_u8);
as_cast_impl!(u16, as_u16);
as_cast_impl!(u32, as_u32);
as_cast_impl!(u64, as_u64);
as_cast_impl!(u128, as_u128);
as_cast_impl!(usize, as_usize);
as_cast_impl!(i8, as_i8);
as_cast_impl!(i16, as_i16);
as_cast_impl!(i32, as_i32);
as_cast_impl!(i64, as_i64);
as_cast_impl!(i128, as_i128);
as_cast_impl!(isize, as_isize);
as_cast_impl!(f32, as_f32);
as_cast_impl!(f64, as_f64);
pub trait Number:
AsCast +
ops::Add<Output=Self> +
ops::AddAssign +
ops::Div<Output=Self> +
ops::DivAssign +
ops::Mul<Output=Self> +
ops::MulAssign +
ops::Rem<Output=Self> +
ops::RemAssign +
ops::Sub<Output=Self> +
ops::SubAssign
{}
macro_rules! number_impl {
($($t:tt)*) => ($(
impl Number for $t {
}
)*)
}
number_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize f32 f64 }
pub trait Integer:
Number +
ops::BitAnd<Output=Self> +
ops::BitAndAssign
{
const ZERO: Self;
}
macro_rules! integer_impl {
($($t:tt)*) => ($(
impl Integer for $t {
const ZERO: $t = 0;
}
)*)
}
integer_impl! { u8 u16 u32 u64 u128 usize i8 i16 i32 i64 i128 isize }
pub trait UnsignedInteger: Integer {}
macro_rules! unsigned_integer_impl {
($($t:ty)*) => ($(
impl UnsignedInteger for $t {}
)*)
}
unsigned_integer_impl! { u8 u16 u32 u64 u128 usize }
/// Type trait for the mantissa type.
pub trait Mantissa: UnsignedInteger {
/// Mask for the left-most bit, to check if the value is normalized.
const NORMALIZED_MASK: Self;
/// Mask to extract the high bits from the integer.
const HIMASK: Self;
/// Mask to extract the low bits from the integer.
const LOMASK: Self;
/// Full size of the integer, in bits.
const FULL: i32;
/// Half size of the integer, in bits.
const HALF: i32 = Self::FULL / 2;
}
impl Mantissa for u64 {
const NORMALIZED_MASK: u64 = 0x8000000000000000;
const HIMASK: u64 = 0xFFFFFFFF00000000;
const LOMASK: u64 = 0x00000000FFFFFFFF;
const FULL: i32 = 64;
}
/// Get exact exponent limit for radix.
pub trait Float: Number {
/// Unsigned type of the same size.
type Unsigned: UnsignedInteger;
const ZERO: Self;
// MASKS
/// Bitmask for the sign bit.
const SIGN_MASK: Self::Unsigned;
/// Bitmask for the exponent, including the hidden bit.
const EXPONENT_MASK: Self::Unsigned;
/// Bitmask for the hidden bit in exponent, which is an implicit 1 in the fraction.
const HIDDEN_BIT_MASK: Self::Unsigned;
/// Bitmask for the mantissa (fraction), excluding the hidden bit.
const MANTISSA_MASK: Self::Unsigned;
// PROPERTIES
/// Positive infinity as bits.
const INFINITY_BITS: Self::Unsigned;
/// Positive infinity as bits.
const NEGATIVE_INFINITY_BITS: Self::Unsigned;
/// Size of the significand (mantissa) without hidden bit.
const MANTISSA_SIZE: i32;
/// Bias of the exponet
const EXPONENT_BIAS: i32;
/// Exponent portion of a denormal float.
const DENORMAL_EXPONENT: i32;
/// Maximum exponent value in float.
const MAX_EXPONENT: i32;
// ROUNDING
/// Default number of bits to shift (or 64 - mantissa size - 1).
const DEFAULT_SHIFT: i32;
/// Mask to determine if a full-carry occurred (1 in bit above hidden bit).
const CARRY_MASK: u64;
/// Get min and max exponent limits (exact) from radix.
fn exponent_limit() -> (i32, i32);
/// Get the number of digits that can be shifted from exponent to mantissa.
fn mantissa_limit() -> i32;
// Re-exported methods from std.
fn powi(self, n: i32) -> Self;
fn from_bits(u: Self::Unsigned) -> Self;
fn to_bits(self) -> Self::Unsigned;
/// Returns true if the float is a denormal.
#[inline]
fn is_denormal(self) -> bool {
self.to_bits() & Self::EXPONENT_MASK == Self::Unsigned::ZERO
}
/// Get exponent component from the float.
#[inline]
fn exponent(self) -> i32 {
if self.is_denormal() {
return Self::DENORMAL_EXPONENT;
}
let bits = self.to_bits();
let biased_e: i32 = (bits & Self::EXPONENT_MASK).as_i32() >> Self::MANTISSA_SIZE;
biased_e - Self::EXPONENT_BIAS
}
/// Get mantissa (significand) component from float.
#[inline]
fn mantissa(self) -> Self::Unsigned {
let bits = self.to_bits();
let s = bits & Self::MANTISSA_MASK;
if !self.is_denormal() {
s + Self::HIDDEN_BIT_MASK
} else {
s
}
}
}
impl Float for f32 {
type Unsigned = u32;
const ZERO: f32 = 0.0;
const SIGN_MASK: u32 = 0x80000000;
const EXPONENT_MASK: u32 = 0x7F800000;
const HIDDEN_BIT_MASK: u32 = 0x00800000;
const MANTISSA_MASK: u32 = 0x007FFFFF;
const INFINITY_BITS: u32 = 0x7F800000;
const NEGATIVE_INFINITY_BITS: u32 = Self::INFINITY_BITS | Self::SIGN_MASK;
const MANTISSA_SIZE: i32 = 23;
const EXPONENT_BIAS: i32 = 127 + Self::MANTISSA_SIZE;
const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS;
const MAX_EXPONENT: i32 = 0xFF - Self::EXPONENT_BIAS;
const DEFAULT_SHIFT: i32 = u64::FULL - f32::MANTISSA_SIZE - 1;
const CARRY_MASK: u64 = 0x1000000;
#[inline]
fn exponent_limit() -> (i32, i32) {
(-10, 10)
}
#[inline]
fn mantissa_limit() -> i32 {
7
}
#[inline]
fn powi(self, n: i32) -> f32 {
f32::powi(self, n)
}
#[inline]
fn from_bits(u: u32) -> f32 {
f32::from_bits(u)
}
#[inline]
fn to_bits(self) -> u32 {
f32::to_bits(self)
}
}
impl Float for f64 {
type Unsigned = u64;
const ZERO: f64 = 0.0;
const SIGN_MASK: u64 = 0x8000000000000000;
const EXPONENT_MASK: u64 = 0x7FF0000000000000;
const HIDDEN_BIT_MASK: u64 = 0x0010000000000000;
const MANTISSA_MASK: u64 = 0x000FFFFFFFFFFFFF;
const INFINITY_BITS: u64 = 0x7FF0000000000000;
const NEGATIVE_INFINITY_BITS: u64 = Self::INFINITY_BITS | Self::SIGN_MASK;
const MANTISSA_SIZE: i32 = 52;
const EXPONENT_BIAS: i32 = 1023 + Self::MANTISSA_SIZE;
const DENORMAL_EXPONENT: i32 = 1 - Self::EXPONENT_BIAS;
const MAX_EXPONENT: i32 = 0x7FF - Self::EXPONENT_BIAS;
const DEFAULT_SHIFT: i32 = u64::FULL - f64::MANTISSA_SIZE - 1;
const CARRY_MASK: u64 = 0x20000000000000;
#[inline]
fn exponent_limit() -> (i32, i32) {
(-22, 22)
}
#[inline]
fn mantissa_limit() -> i32 {
15
}
#[inline]
fn powi(self, n: i32) -> f64 {
f64::powi(self, n)
}
#[inline]
fn from_bits(u: u64) -> f64 {
f64::from_bits(u)
}
#[inline]
fn to_bits(self) -> u64 {
f64::to_bits(self)
}
}
+158
View File
@@ -0,0 +1,158 @@
use super::cached::*;
use super::float::ExtendedFloat;
use super::num::*;
const POW10: [u64; 20] = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000];
// FAST
// ----
/// Convert mantissa to exact value for a non-base2 power.
///
/// Returns the resulting float and if the value can be represented exactly.
fn fast_path<F>(mantissa: u64, exponent: i32)
-> Option<F>
where F: Float
{
// `mantissa >> (F::MANTISSA_SIZE+1) != 0` effectively checks if the
// value has a no bits above the hidden bit, which is what we want.
let (min_exp, max_exp) = F::exponent_limit();
let shift_exp = F::mantissa_limit();
let mantissa_size = F::MANTISSA_SIZE + 1;
if mantissa >> mantissa_size != 0 {
// Would require truncation of the mantissa.
None
} else if exponent == 0 {
// 0 exponent, same as value, exact representation.
let float = F::as_cast(mantissa);
Some(float)
} else if exponent >= min_exp && exponent <= max_exp {
// Value can be exactly represented, return the value.
// Use powi, since it's correct, and faster on
// the fast-path.
let float = F::as_cast(mantissa);
let base = F::as_cast(10i32);
Some(float.mul(base.powi(exponent)))
} else if exponent >= 0 && exponent <= max_exp + shift_exp {
// Check to see if we have a disguised fast-path, where the
// number of digits in the mantissa is very small, but and
// so digits can be shifted from the exponent to the mantissa.
// https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
let small_powers = POW10;
let shift = exponent - max_exp;
let power = small_powers[shift.as_usize()];
// Compute the product of the power, if it overflows,
// prematurely return early, otherwise, if we didn't overshoot,
// we can get an exact value.
let value = mantissa.checked_mul(power)?;
if value >> mantissa_size != 0 {
None
} else {
// Use powi, since it's correct, and faster on
// the fast-path.
let float = F::as_cast(value);
let base = F::as_cast(10i32);
Some(float.mul(base.powi(max_exp)))
}
} else {
// Cannot be exactly represented, exponent too small or too big,
// would require truncation.
None
}
}
// MODERATE
// --------
/// Multiply the floating-point by the exponent.
///
/// Multiply by pre-calculated powers of the base, modify the extended-
/// float, and return if new value and if the value can be represented
/// accurately.
fn multiply_exponent_extended<F>(fp: &mut ExtendedFloat, exponent: i32)
where F: Float
{
let powers = ExtendedFloat::get_powers();
let exponent = exponent.saturating_add(powers.bias);
let small_index = exponent % powers.step;
let large_index = exponent / powers.step;
if exponent < 0 {
// Guaranteed underflow (assign 0).
fp.mant = 0;
} else if large_index as usize >= powers.large.len() {
// Overflow (assign infinity)
fp.mant = 1 << 63;
fp.exp = 0x7FF;
} else {
// Within the valid exponent range, multiply by the large and small
// exponents and return the resulting value.
// Multiply by the small power.
// Check if we can directly multiply by an integer, if not,
// use extended-precision multiplication.
match fp.mant.overflowing_mul(powers.get_small_int(small_index.as_usize())) {
// Overflow, multiplication unsuccessful, go slow path.
(_, true) => {
fp.normalize();
fp.imul(&powers.get_small(small_index.as_usize()));
},
// No overflow, multiplication successful.
(mant, false) => {
fp.mant = mant;
fp.normalize();
},
}
// Multiply by the large power
fp.imul(&powers.get_large(large_index.as_usize()));
// Normalize the floating point (and the errors).
fp.normalize();
}
}
/// Create a precise native float using an intermediate extended-precision float.
///
/// Return the float approximation and if the value can be accurately
/// represented with mantissa bits of precision.
pub(super) fn moderate_path<F>(mantissa: u64, exponent: i32) -> F
where F: Float
{
let mut fp = ExtendedFloat { mant: mantissa, exp: 0 };
multiply_exponent_extended::<F>(&mut fp, exponent);
fp.into_float::<F>()
}
// PARSE
// -----
/// Parse non-power-of-two radix string to native float.
///
/// * `mantissa` - Significant digits for float.
/// * `exponent` - Mantissa exponent in decimal.
///
/// # Warning
/// The exponent is not the parsed exponent, for example:
/// "2.543" would have a mantissa of `2543` and an exponent of `-3`,
/// signifying it should be 2543 * 10^-3.
pub fn parse_float<F>(mantissa: u64, exponent: i32, truncated: bool) -> F
where F: Float
{
// Process the state to a float.
if mantissa == 0 {
// Literal 0, return early.
// Value cannot be truncated, since truncation only occurs on
// overflow or underflow.
F::ZERO
} else if !truncated {
// Try the fast path, no mantissa truncation.
if let Some(float) = fast_path::<F>(mantissa, exponent) {
float
} else {
moderate_path(mantissa, exponent)
}
} else {
moderate_path(mantissa, exponent)
}
}
+160
View File
@@ -0,0 +1,160 @@
use std::mem;
use super::float::ExtendedFloat;
use super::num::*;
use super::shift::*;
/// Calculate a scalar factor of 2 above the halfway point.
#[inline]
fn nth_bit(n: u64) -> u64
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n < bits, "nth_bit() overflow in shl.");
1 << n
}
/// Generate a bitwise mask for the lower `n` bits.
#[inline]
fn lower_n_mask(n: u64) -> u64
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_mask() overflow in shl.");
match n == bits {
true => u64::max_value(),
false => (1 << n) - 1,
}
}
/// Calculate the halfway point for the lower `n` bits.
#[inline]
fn lower_n_halfway(n: u64) -> u64
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_halfway() overflow in shl.");
match n == 0 {
true => 0,
false => nth_bit(n - 1),
}
}
/// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position.
#[inline]
fn internal_n_mask(bit: u64, n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(n <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(bit >= n, "internal_n_halfway() overflow in sub.");
lower_n_mask(bit) ^ lower_n_mask(bit - n)
}
// Shift right N-bytes and round to the nearest.
//
// Return if we are above halfway and if we are halfway.
pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32)
-> (bool, bool)
{
// Extract the truncated bits using mask.
// Calculate if the value of the truncated bits are either above
// the mid-way point, or equal to it.
//
// For example, for 4 truncated bytes, the mask would be b1111
// and the midway point would be b1000.
let mask: u64 = lower_n_mask(shift as u64);
let halfway: u64 = lower_n_halfway(shift as u64);
let truncated_bits = fp.mant & mask;
let is_above = truncated_bits > halfway;
let is_halfway = truncated_bits == halfway;
// Bit shift so the leading bit is in the hidden bit.
overflowing_shr(fp, shift);
(is_above, is_halfway)
}
// ROUND TO FLOAT
// Shift the ExtendedFloat fraction to the fraction bits in a native float.
//
// Floating-point arithmetic uses round to nearest, ties to even,
// which rounds to the nearest value, if the value is halfway in between,
// round to an even value.
pub(crate) fn round_to_float<F>(fp: &mut ExtendedFloat)
where F: Float
{
// Calculate the difference to allow a single calculation
// rather than a loop, to minimize the number of ops required.
// This does underflow detection.
let final_exp = fp.exp + F::DEFAULT_SHIFT;
if final_exp < F::DENORMAL_EXPONENT {
// We would end up with a denormal exponent, try to round to more
// digits. Only shift right if we can avoid zeroing out the value,
// which requires the exponent diff to be < M::BITS. The value
// is already normalized, so we shouldn't have any issue zeroing
// out the value.
let diff = F::DENORMAL_EXPONENT - fp.exp;
if diff <= u64::FULL {
// We can avoid underflow, can get a valid representation.
round_nearest(fp, diff);
} else {
// Certain underflow, assign literal 0s.
fp.mant = 0;
fp.exp = 0;
}
} else {
round_nearest(fp, F::DEFAULT_SHIFT);
}
if fp.mant & F::CARRY_MASK == F::CARRY_MASK {
// Roundup carried over to 1 past the hidden bit.
shr(fp, 1);
}
}
// AVOID OVERFLOW/UNDERFLOW
// Avoid overflow for large values, shift left as needed.
//
// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
fn avoid_overflow<F>(fp: &mut ExtendedFloat)
where F: Float
{
// Calculate the difference to allow a single calculation
// rather than a loop, minimizing the number of ops required.
if fp.exp >= F::MAX_EXPONENT {
let diff = fp.exp - F::MAX_EXPONENT;
if diff <= F::MANTISSA_SIZE {
// Our overflow mask needs to start at the hidden bit, or at
// `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
// to see if our value overflows.
let bit = (F::MANTISSA_SIZE + 1).as_u64();
let n = (diff + 1).as_u64();
let mask = internal_n_mask(bit, n);
if (fp.mant & mask) == 0 {
// If we have no 1-bit in the hidden-bit position,
// which is index 0, we need to shift 1.
let shift = diff + 1;
shl(fp, shift);
}
}
}
}
// ROUND TO NATIVE
// Round an extended-precision float to a native float representation.
pub(crate) fn round_to_native<F>(fp: &mut ExtendedFloat)
where F: Float
{
// Shift all the way left, to ensure a consistent representation.
// The following right-shifts do not work for a non-normalized number.
fp.normalize();
// Round so the fraction is in a native mantissa representation,
// and avoid overflow/underflow.
round_to_float::<F>(fp);
avoid_overflow::<F>(fp);
}
+38
View File
@@ -0,0 +1,38 @@
use std::mem;
use super::float::ExtendedFloat;
// Shift extended-precision float right `shift` bytes.
pub(crate) fn shr(fp: &mut ExtendedFloat, shift: i32)
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!((shift as u64) < bits, "shr() overflow in shift right.");
fp.mant >>= shift;
fp.exp += shift;
}
// Shift extended-precision float right `shift` bytes.
//
// Accepts when the shift is the same as the type size, and
// sets the value to 0.
pub(crate) fn overflowing_shr(fp: &mut ExtendedFloat, shift: i32)
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!((shift as u64) <= bits, "overflowing_shr() overflow in shift right.");
fp.mant = match shift as u64 == bits {
true => 0,
false => fp.mant >> shift,
};
fp.exp += shift;
}
// Shift extended-precision float left `shift` bytes.
pub(crate) fn shl(fp: &mut ExtendedFloat, shift: i32)
{
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!((shift as u64) < bits, "shl() overflow in shift left.");
fp.mant <<= shift;
fp.exp -= shift;
}