Skip to content

Commit

Permalink
Work on bigint
Browse files Browse the repository at this point in the history
Try splitting part of 'Int' into 'MinInt' so we don't need to implement everything on u256/i256

Add addsub test

Add mul/div/rem tests

Add cmp test

Remove 32-bit div implementation

formatting updates

disable div tests for now

Bigint updates

Big update

Fix widen mul

wrapping add

disable duplicate symbols in builtins

Apply temporary unord fix from @beetrees #593

tests

add lowerhex display

errors by ref

tests

fix-test

Update big tests

Fix core calls

Disable widen_mul for signed

Test adding symbols in build.rs

Add a feature to compile intrinsics that are missing on the system for testing

update

Disable f128 tests on platforms without system support

add missing build.rs file

pull cas file from master

testgs

print more div values

Add a benchmark

Work on fixing bit widths

Update benchmark
  • Loading branch information
tgross35 committed May 6, 2024
1 parent fcda955 commit 6d70c0f
Show file tree
Hide file tree
Showing 23 changed files with 952 additions and 194 deletions.
13 changes: 0 additions & 13 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ fn main() {
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
}

// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
// target triple. This is usually correct for our built-in targets but can break in presence of
// custom targets, which can have arbitrary names.
let llvm_target = target.split('-').collect::<Vec<_>>();
Expand Down Expand Up @@ -478,10 +477,6 @@ mod c {
("__floatunsitf", "floatunsitf.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
("__subtf3", "subtf3.c"),
("__divtf3", "divtf3.c"),
("__powitf2", "powitf2.c"),
("__fe_getround", "fp_mode.c"),
("__fe_raise_inexact", "fp_mode.c"),
Expand All @@ -500,15 +495,11 @@ mod c {
sources.extend(&[
("__extenddftf2", "extenddftf2.c"),
("__netf2", "comparetf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
("__subtf3", "subtf3.c"),
("__fixtfsi", "fixtfsi.c"),
("__floatsitf", "floatsitf.c"),
("__fixunstfsi", "fixunstfsi.c"),
("__floatunsitf", "floatunsitf.c"),
("__fe_getround", "fp_mode.c"),
("__divtf3", "divtf3.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
]);
Expand All @@ -518,15 +509,11 @@ mod c {
sources.extend(&[
("__extenddftf2", "extenddftf2.c"),
("__netf2", "comparetf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
("__subtf3", "subtf3.c"),
("__fixtfsi", "fixtfsi.c"),
("__floatsitf", "floatsitf.c"),
("__fixunstfsi", "fixunstfsi.c"),
("__floatunsitf", "floatunsitf.c"),
("__fe_getround", "fp_mode.c"),
("__divtf3", "divtf3.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
]);
Expand Down
22 changes: 11 additions & 11 deletions src/float/add.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, Int};
use crate::int::{CastInto, Int, MinInt};

/// Returns `a + b`
fn add<F: Float>(a: F, b: F) -> F
Expand Down Expand Up @@ -57,17 +57,17 @@ where
}

// zero + anything = anything
if a_abs == Int::ZERO {
if a_abs == MinInt::ZERO {
// but we need to get the sign right for zero + zero
if b_abs == Int::ZERO {
if b_abs == MinInt::ZERO {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}

// anything + zero = anything
if b_abs == Int::ZERO {
if b_abs == MinInt::ZERO {
return a;
}
}
Expand Down Expand Up @@ -113,10 +113,10 @@ where
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent).cast();
if align != Int::ZERO {
if align != MinInt::ZERO {
if align < bits {
let sticky =
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
b_significand = (b_significand >> align.cast()) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
Expand All @@ -125,8 +125,8 @@ where
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == Int::ZERO {
return F::from_repr(Int::ZERO);
if a_significand == MinInt::ZERO {
return F::from_repr(MinInt::ZERO);
}

// If partial cancellation occured, we need to left-shift the result
Expand All @@ -143,8 +143,8 @@ where

// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != Int::ZERO {
let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
if a_significand & implicit_bit << 4 != MinInt::ZERO {
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
Expand All @@ -160,7 +160,7 @@ where
// need to shift the significand.
let shift = (1 - a_exponent).cast();
let sticky =
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
a_significand = a_significand >> shift.cast() | sticky;
a_exponent = 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/float/cmp.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(unreachable_code)]

use crate::float::Float;
use crate::int::Int;
use crate::int::MinInt;

#[derive(Clone, Copy)]
enum Result {
Expand Down
95 changes: 60 additions & 35 deletions src/float/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#![allow(clippy::needless_return)]

use crate::float::Float;
use crate::int::{CastInto, DInt, HInt, Int};
use crate::int::{CastInto, DInt, HInt, Int, MinInt};

use super::HalfRep;

fn div32<F: Float>(a: F, b: F) -> F
where
Expand Down Expand Up @@ -37,6 +39,11 @@ where
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;

// #[inline(always)]
// fn negate<T: Int>(a: T) -> T {
// T::wrapping_neg(a.signe)
// }

#[inline(always)]
fn negate_u32(a: u32) -> u32 {
(<i32>::wrapping_neg(a as i32)) as u32
Expand Down Expand Up @@ -459,10 +466,14 @@ where
i32: CastInto<F::Int>,
F::Int: CastInto<i32>,
u64: CastInto<F::Int>,
u64: CastInto<HalfRep<F>>,
F::Int: CastInto<HalfRep<F>>,
F::Int: From<HalfRep<F>>,
F::Int: From<u8>,
F::Int: CastInto<u64>,
i64: CastInto<F::Int>,
F::Int: CastInto<i64>,
F::Int: HInt,
F::Int: HInt + DInt,
{
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
Expand All @@ -471,7 +482,7 @@ where
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let hw = F::BITS / 2;
let lo_mask = u64::MAX >> hw;
let lo_mask = F::Int::MAX >> hw;

let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
Expand Down Expand Up @@ -616,21 +627,23 @@ where

let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u32 =
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
);

// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.

// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));

// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u32 = {
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
let x_uq0_hw: HalfRep<F> = {
let mut x_uq0_hw: HalfRep<F> =
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// dbg!(x_uq0_hw);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
Expand Down Expand Up @@ -661,8 +674,9 @@ where
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u32 =
0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
));
// dbg!(corr_uq1_hw);

// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
Expand All @@ -677,7 +691,9 @@ where
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
>> (hw - 1))
.cast();
// dbg!(x_uq0_hw);
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
Expand Down Expand Up @@ -707,7 +723,7 @@ where
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u32)
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
};

// Error estimations for full-precision iterations are calculated just
Expand All @@ -717,7 +733,7 @@ where
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
let blo: F::Int = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
Expand All @@ -726,19 +742,20 @@ where
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u64(
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
.wrapping_sub(one)
.wrapping_neg(); // account for *possible* carry
let lo_corr: F::Int = corr_uq1 & lo_mask;
let hi_corr: F::Int = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
.wrapping_sub(2))
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
.wrapping_sub(F::Int::from(2u8));
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work

Expand All @@ -755,8 +772,8 @@ where
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
x_uq0
};
Expand Down Expand Up @@ -799,14 +816,27 @@ where

// Add 2 to U_N due to final decrement.

let reciprocal_precision: <F as Float>::Int = 220.cast();
let reciprocal_precision: F::Int = if F::BITS == 32
&& NUMBER_OF_HALF_ITERATIONS == 2
&& NUMBER_OF_FULL_ITERATIONS == 1
{
74.cast()
} else if F::BITS == 32 && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 {
10.cast()
} else if F::BITS == 64 && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 {
220.cast()
} else if F::BITS == 128 && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 {
13922.cast()
} else {
panic!("invalid iterations for the specified bits");
};

// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?

let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).

// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
Expand Down Expand Up @@ -914,13 +944,8 @@ intrinsics! {
div64(a, b)
}

// TODO: how should `HInt` be handled?
pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
if cfg!(target_pointer_width = "64") {
div32(a, b)
} else {
div64(a, b)
}
div64(a, b)
}

#[cfg(target_arch = "arm")]
Expand Down
2 changes: 1 addition & 1 deletion src/float/extend.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, Int};
use crate::int::{CastInto, Int, MinInt};

/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
fn extend<F: Float, R: Float>(a: F) -> R
Expand Down
23 changes: 20 additions & 3 deletions src/float/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use core::ops;

use super::int::Int;
use crate::int::DInt;

use super::int::{Int, MinInt};

pub mod add;
pub mod cmp;
Expand All @@ -12,6 +14,9 @@ pub mod pow;
pub mod sub;
pub mod trunc;

/// Wrapper to extract the integer type half of the float's size
pub(crate) type HalfRep<F: Float> = <F::Int as DInt>::H;

public_test_dep! {
/// Trait for some basic operations on floats
pub(crate) trait Float:
Expand Down Expand Up @@ -127,7 +132,20 @@ macro_rules! float_impl {
self.to_bits() as Self::SignedInt
}
fn eq_repr(self, rhs: Self) -> bool {
if self.is_nan() && rhs.is_nan() {
#[cfg(feature = "mangled-names")]
fn is_nan(x: $ty) -> bool {
// When using mangled-names, the "real" compiler-builtins might not have the
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
// FIXME: Remove once the nightly toolchain has the __unordtf2 builtin
// x is NaN if all the bits of the exponent are set and the significand is non-0
x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
&& x.repr() & $ty::SIGNIFICAND_MASK != 0
}
#[cfg(not(feature = "mangled-names"))]
fn is_nan(x: $ty) -> bool {
x.is_nan()
}
if is_nan(self) && is_nan(rhs) {
true
} else {
self.repr() == rhs.repr()
Expand Down Expand Up @@ -171,7 +189,6 @@ macro_rules! float_impl {
};
}

// FIXME: there aren't any intrinsics for f16 that I know of, do we need this?
float_impl!(f16, u16, i16, i16, 16, 10);
float_impl!(f32, u32, i32, i16, 32, 23);
float_impl!(f64, u64, i64, i16, 64, 52);
Expand Down
Loading

0 comments on commit 6d70c0f

Please sign in to comment.