Work on bigint

Try splitting part of 'Int' into 'MinInt' so we don't need to implement everything on u256/i256 Add addsub test Add mul/div/rem tests Add cmp test Remove 32-bit div implementation formatting updates disable div tests for now Bigint updates Big update Fix widen mul wrapping add disable duplicate symbols in builtins Apply temporary unord fix from @beetrees #593 tests add lowerhex display errors by ref tests fix-test Update big tests Fix core calls Disable widen_mul for signed Test adding symbols in build.rs Add a feature to compile intrinsics that are missing on the system for testing update Disable f128 tests on platforms without system support add missing build.rs file pull cas file from master testgs print more div values Add a benchmark Work on fixing bit widths Update benchmark
rust-lang · May 6, 2024 · 6d70c0f · 6d70c0f
1 parent fcda955
commit 6d70c0f
Show file tree

Hide file tree

Showing 23 changed files with 952 additions and 194 deletions.
diff --git a/build.rs b/build.rs
@@ -44,7 +44,6 @@ fn main() {
         println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
     }
 
-    // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
     // target triple. This is usually correct for our built-in targets but can break in presence of
     // custom targets, which can have arbitrary names.
     let llvm_target = target.split('-').collect::<Vec<_>>();
@@ -478,10 +477,6 @@ mod c {
                 ("__floatunsitf", "floatunsitf.c"),
                 ("__trunctfdf2", "trunctfdf2.c"),
                 ("__trunctfsf2", "trunctfsf2.c"),
-                ("__addtf3", "addtf3.c"),
-                ("__multf3", "multf3.c"),
-                ("__subtf3", "subtf3.c"),
-                ("__divtf3", "divtf3.c"),
                 ("__powitf2", "powitf2.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
@@ -500,15 +495,11 @@ mod c {
             sources.extend(&[
                 ("__extenddftf2", "extenddftf2.c"),
                 ("__netf2", "comparetf2.c"),
-                ("__addtf3", "addtf3.c"),
-                ("__multf3", "multf3.c"),
-                ("__subtf3", "subtf3.c"),
                 ("__fixtfsi", "fixtfsi.c"),
                 ("__floatsitf", "floatsitf.c"),
                 ("__fixunstfsi", "fixunstfsi.c"),
                 ("__floatunsitf", "floatunsitf.c"),
                 ("__fe_getround", "fp_mode.c"),
-                ("__divtf3", "divtf3.c"),
                 ("__trunctfdf2", "trunctfdf2.c"),
                 ("__trunctfsf2", "trunctfsf2.c"),
             ]);
@@ -518,15 +509,11 @@ mod c {
             sources.extend(&[
                 ("__extenddftf2", "extenddftf2.c"),
                 ("__netf2", "comparetf2.c"),
-                ("__addtf3", "addtf3.c"),
-                ("__multf3", "multf3.c"),
-                ("__subtf3", "subtf3.c"),
                 ("__fixtfsi", "fixtfsi.c"),
                 ("__floatsitf", "floatsitf.c"),
                 ("__fixunstfsi", "fixunstfsi.c"),
                 ("__floatunsitf", "floatunsitf.c"),
                 ("__fe_getround", "fp_mode.c"),
-                ("__divtf3", "divtf3.c"),
                 ("__trunctfdf2", "trunctfdf2.c"),
                 ("__trunctfsf2", "trunctfsf2.c"),
             ]);

diff --git a/src/float/add.rs b/src/float/add.rs
@@ -1,5 +1,5 @@
 use crate::float::Float;
-use crate::int::{CastInto, Int};
+use crate::int::{CastInto, Int, MinInt};
 
 /// Returns `a + b`
 fn add<F: Float>(a: F, b: F) -> F
@@ -57,17 +57,17 @@ where
         }
 
         // zero + anything = anything
-        if a_abs == Int::ZERO {
+        if a_abs == MinInt::ZERO {
             // but we need to get the sign right for zero + zero
-            if b_abs == Int::ZERO {
+            if b_abs == MinInt::ZERO {
                 return F::from_repr(a.repr() & b.repr());
             } else {
                 return b;
             }
         }
 
         // anything + zero = anything
-        if b_abs == Int::ZERO {
+        if b_abs == MinInt::ZERO {
             return a;
         }
     }
@@ -113,10 +113,10 @@ where
     // Shift the significand of b by the difference in exponents, with a sticky
     // bottom bit to get rounding correct.
     let align = a_exponent.wrapping_sub(b_exponent).cast();
-    if align != Int::ZERO {
+    if align != MinInt::ZERO {
         if align < bits {
             let sticky =
-                F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
+                F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
             b_significand = (b_significand >> align.cast()) | sticky;
         } else {
             b_significand = one; // sticky; b is known to be non-zero.
@@ -125,8 +125,8 @@ where
     if subtraction {
         a_significand = a_significand.wrapping_sub(b_significand);
         // If a == -b, return +zero.
-        if a_significand == Int::ZERO {
-            return F::from_repr(Int::ZERO);
+        if a_significand == MinInt::ZERO {
+            return F::from_repr(MinInt::ZERO);
         }
 
         // If partial cancellation occured, we need to left-shift the result
@@ -143,8 +143,8 @@ where
 
         // If the addition carried up, we need to right-shift the result and
         // adjust the exponent:
-        if a_significand & implicit_bit << 4 != Int::ZERO {
-            let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
+        if a_significand & implicit_bit << 4 != MinInt::ZERO {
+            let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
             a_significand = a_significand >> 1 | sticky;
             a_exponent += 1;
         }
@@ -160,7 +160,7 @@ where
         // need to shift the significand.
         let shift = (1 - a_exponent).cast();
         let sticky =
-            F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
+            F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
         a_significand = a_significand >> shift.cast() | sticky;
         a_exponent = 0;
     }

diff --git a/src/float/cmp.rs b/src/float/cmp.rs
@@ -1,7 +1,7 @@
 #![allow(unreachable_code)]
 
 use crate::float::Float;
-use crate::int::Int;
+use crate::int::MinInt;
 
 #[derive(Clone, Copy)]
 enum Result {

diff --git a/src/float/div.rs b/src/float/div.rs
@@ -3,7 +3,9 @@
 #![allow(clippy::needless_return)]
 
 use crate::float::Float;
-use crate::int::{CastInto, DInt, HInt, Int};
+use crate::int::{CastInto, DInt, HInt, Int, MinInt};
+
+use super::HalfRep;
 
 fn div32<F: Float>(a: F, b: F) -> F
 where
@@ -37,6 +39,11 @@ where
     let quiet_bit = implicit_bit >> 1;
     let qnan_rep = exponent_mask | quiet_bit;
 
+    // #[inline(always)]
+    // fn negate<T: Int>(a: T) -> T {
+    //     T::wrapping_neg(a.signe)
+    // }
+
     #[inline(always)]
     fn negate_u32(a: u32) -> u32 {
         (<i32>::wrapping_neg(a as i32)) as u32
@@ -459,10 +466,14 @@ where
     i32: CastInto<F::Int>,
     F::Int: CastInto<i32>,
     u64: CastInto<F::Int>,
+    u64: CastInto<HalfRep<F>>,
+    F::Int: CastInto<HalfRep<F>>,
+    F::Int: From<HalfRep<F>>,
+    F::Int: From<u8>,
     F::Int: CastInto<u64>,
     i64: CastInto<F::Int>,
     F::Int: CastInto<i64>,
-    F::Int: HInt,
+    F::Int: HInt + DInt,
 {
     const NUMBER_OF_HALF_ITERATIONS: usize = 3;
     const NUMBER_OF_FULL_ITERATIONS: usize = 1;
@@ -471,7 +482,7 @@ where
     let one = F::Int::ONE;
     let zero = F::Int::ZERO;
     let hw = F::BITS / 2;
-    let lo_mask = u64::MAX >> hw;
+    let lo_mask = F::Int::MAX >> hw;
 
     let significand_bits = F::SIGNIFICAND_BITS;
     let max_exponent = F::EXPONENT_MAX;
@@ -616,21 +627,23 @@ where
 
     let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
         // Starting with (n-1) half-width iterations
-        let b_uq1_hw: u32 =
-            (CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
+        let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
+            CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
+        );
 
         // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
         // with W0 being either 16 or 32 and W0 <= HW.
         // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
         // b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
 
         // HW is at least 32. Shifting into the highest bits if needed.
-        let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
+        let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));
 
         // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
         // so x0 fits to UQ0.HW without wrapping.
-        let x_uq0_hw: u32 = {
-            let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
+        let x_uq0_hw: HalfRep<F> = {
+            let mut x_uq0_hw: HalfRep<F> =
+                c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
             // dbg!(x_uq0_hw);
             // An e_0 error is comprised of errors due to
             // * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +674,9 @@ where
                 // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
                 // expected to be strictly positive because b_UQ1_hw has its highest bit set
                 // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
-                let corr_uq1_hw: u32 =
-                    0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
+                let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
+                    ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
+                ));
                 // dbg!(corr_uq1_hw);
 
                 // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +691,9 @@ where
                 // The fact corr_UQ1_hw was virtually round up (due to result of
                 // multiplication being **first** truncated, then negated - to improve
                 // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
-                x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
+                x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
+                    >> (hw - 1))
+                    .cast();
                 // dbg!(x_uq0_hw);
                 // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
                 // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +723,7 @@ where
             // be not below that value (see g(x) above), so it is safe to decrement just
             // once after the final iteration. On the other hand, an effective value of
             // divisor changes after this point (from b_hw to b), so adjust here.
-            x_uq0_hw.wrapping_sub(1_u32)
+            x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
         };
 
         // Error estimations for full-precision iterations are calculated just
@@ -717,7 +733,7 @@ where
         // Simulating operations on a twice_rep_t to perform a single final full-width
         // iteration. Using ad-hoc multiplication implementations to take advantage
         // of particular structure of operands.
-        let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
+        let blo: F::Int = b_uq1 & lo_mask;
         // x_UQ0 = x_UQ0_hw * 2^HW - 1
         // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
         //
@@ -726,19 +742,20 @@ where
         // +            [  x_UQ0_hw *  blo  ]
         // -                      [      b_UQ1       ]
         // = [      result       ][.... discarded ...]
-        let corr_uq1 = negate_u64(
-            (x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
-        ); // account for *possible* carry
-        let lo_corr = corr_uq1 & lo_mask;
-        let hi_corr = corr_uq1 >> hw;
+        let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
+            + ((F::Int::from(x_uq0_hw) * blo) >> hw))
+            .wrapping_sub(one)
+            .wrapping_neg(); // account for *possible* carry
+        let lo_corr: F::Int = corr_uq1 & lo_mask;
+        let hi_corr: F::Int = corr_uq1 >> hw;
         // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
-        let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
-            .wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
-            .wrapping_sub(2))
-        .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
-                 // 1 to account for possible carry
-                 // Just like the case of half-width iterations but with possibility
-                 // of overflowing by one extra Ulp of x_UQ0.
+        let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
+            .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
+            .wrapping_sub(F::Int::from(2u8));
+        // 1 to account for the highest bit of corr_UQ1 can be 1
+        // 1 to account for possible carry
+        // Just like the case of half-width iterations but with possibility
+        // of overflowing by one extra Ulp of x_UQ0.
         x_uq0 -= one;
         // ... and then traditional fixup by 2 should work
 
@@ -755,8 +772,8 @@ where
         x_uq0
     } else {
         // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
-        let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
-        let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
+        let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
+        let x_uq0: F::Int = c.wrapping_sub(b_uq1);
         // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
         x_uq0
     };
@@ -799,14 +816,27 @@ where
 
     // Add 2 to U_N due to final decrement.
 
-    let reciprocal_precision: <F as Float>::Int = 220.cast();
+    let reciprocal_precision: F::Int = if F::BITS == 32
+        && NUMBER_OF_HALF_ITERATIONS == 2
+        && NUMBER_OF_FULL_ITERATIONS == 1
+    {
+        74.cast()
+    } else if F::BITS == 32 && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 {
+        10.cast()
+    } else if F::BITS == 64 && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 {
+        220.cast()
+    } else if F::BITS == 128 && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 {
+        13922.cast()
+    } else {
+        panic!("invalid iterations for the specified bits");
+    };
 
     // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
     let x_uq0 = x_uq0 - reciprocal_precision;
     // Now 1/b - (2*P) * 2^-W < x < 1/b
     // FIXME Is x_UQ0 still >= 0.5?
 
-    let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
+    let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
     // Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
 
     // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -914,13 +944,8 @@ intrinsics! {
         div64(a, b)
     }
 
-    // TODO: how should `HInt` be handled?
     pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
-        if cfg!(target_pointer_width = "64") {
-            div32(a, b)
-        } else {
-            div64(a, b)
-        }
+        div64(a, b)
     }
 
     #[cfg(target_arch = "arm")]

diff --git a/src/float/extend.rs b/src/float/extend.rs
@@ -1,5 +1,5 @@
 use crate::float::Float;
-use crate::int::{CastInto, Int};
+use crate::int::{CastInto, Int, MinInt};
 
 /// Generic conversion from a narrower to a wider IEEE-754 floating-point type
 fn extend<F: Float, R: Float>(a: F) -> R

diff --git a/src/float/mod.rs b/src/float/mod.rs
@@ -1,6 +1,8 @@
 use core::ops;
 
-use super::int::Int;
+use crate::int::DInt;
+
+use super::int::{Int, MinInt};
 
 pub mod add;
 pub mod cmp;
@@ -12,6 +14,9 @@ pub mod pow;
 pub mod sub;
 pub mod trunc;
 
+/// Wrapper to extract the integer type half of the float's size
+pub(crate) type HalfRep<F: Float> = <F::Int as DInt>::H;
+
 public_test_dep! {
 /// Trait for some basic operations on floats
 pub(crate) trait Float:
@@ -127,7 +132,20 @@ macro_rules! float_impl {
                 self.to_bits() as Self::SignedInt
             }
             fn eq_repr(self, rhs: Self) -> bool {
-                if self.is_nan() && rhs.is_nan() {
+                #[cfg(feature = "mangled-names")]
+                fn is_nan(x: $ty) -> bool {
+                    // When using mangled-names, the "real" compiler-builtins might not have the
+                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
+                    // FIXME: Remove once the nightly toolchain has the __unordtf2 builtin
+                    // x is NaN if all the bits of the exponent are set and the significand is non-0
+                    x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
+                        && x.repr() & $ty::SIGNIFICAND_MASK != 0
+                }
+                #[cfg(not(feature = "mangled-names"))]
+                fn is_nan(x: $ty) -> bool {
+                    x.is_nan()
+                }
+                if is_nan(self) && is_nan(rhs) {
                     true
                 } else {
                     self.repr() == rhs.repr()
@@ -171,7 +189,6 @@ macro_rules! float_impl {
     };
 }
 
-// FIXME: there aren't any intrinsics for f16 that I know of, do we need this?
 float_impl!(f16, u16, i16, i16, 16, 10);
 float_impl!(f32, u32, i32, i16, 32, 23);
 float_impl!(f64, u64, i64, i16, 64, 52);