Skip to content

Commit

Permalink
Work on fixing bit widths
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross35 committed Apr 20, 2024
1 parent 0f099eb commit 0e755e8
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 68 deletions.
87 changes: 57 additions & 30 deletions src/float/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
use crate::float::Float;
use crate::int::{CastInto, DInt, HInt, Int, MinInt};

use super::HalfRep;

fn div32<F: Float>(a: F, b: F) -> F
where
u32: CastInto<F::Int>,
Expand Down Expand Up @@ -37,6 +39,11 @@ where
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;

// #[inline(always)]
// fn negate<T: Int>(a: T) -> T {
// T::wrapping_neg(a.signe)
// }

#[inline(always)]
fn negate_u32(a: u32) -> u32 {
(<i32>::wrapping_neg(a as i32)) as u32
Expand Down Expand Up @@ -459,6 +466,10 @@ where
i32: CastInto<F::Int>,
F::Int: CastInto<i32>,
u64: CastInto<F::Int>,
u64: CastInto<HalfRep<F>>,
F::Int: CastInto<HalfRep<F>>,
F::Int: From<HalfRep<F>>,
F::Int: From<u8>,
F::Int: CastInto<u64>,
i64: CastInto<F::Int>,
F::Int: CastInto<i64>,
Expand All @@ -467,13 +478,11 @@ where
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
const USE_NATIVE_FULL_ITERATIONS: bool = false;
vdbg!(a);
vdbg!(b);

let one = F::Int::ONE;
let zero = F::Int::ZERO;
let hw = F::BITS / 2;
let lo_mask = if hw == 64 { 0 } else { u64::MAX >> hw };
let lo_mask = F::Int::MAX >> hw;

let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
Expand Down Expand Up @@ -618,21 +627,23 @@ where

let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u32 =
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
);

// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.

// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));

// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u32 = {
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
let x_uq0_hw: HalfRep<F> = {
let mut x_uq0_hw: HalfRep<F> =
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// dbg!(x_uq0_hw);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
Expand Down Expand Up @@ -663,9 +674,9 @@ where
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u32 = vdbg!(0.wrapping_sub(
((vdbg!(x_uq0_hw) as u64).wrapping_mul(vdbg!(b_uq1_hw) as u64)) >> vdbg!(hw)
) as u32);
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
));
// dbg!(corr_uq1_hw);

// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
Expand All @@ -680,7 +691,9 @@ where
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
>> (hw - 1))
.cast();
// dbg!(x_uq0_hw);
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
Expand Down Expand Up @@ -710,7 +723,7 @@ where
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u32)
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
};

// Error estimations for full-precision iterations are calculated just
Expand All @@ -720,7 +733,7 @@ where
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
let blo: F::Int = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
Expand All @@ -729,19 +742,20 @@ where
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u64(
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
.wrapping_sub(one)
.wrapping_neg(); // account for *possible* carry
let lo_corr: F::Int = corr_uq1 & lo_mask;
let hi_corr: F::Int = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
.wrapping_sub(2))
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
.wrapping_sub(F::Int::from(2u8));
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work

Expand All @@ -758,8 +772,8 @@ where
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
x_uq0
};
Expand Down Expand Up @@ -802,14 +816,27 @@ where

// Add 2 to U_N due to final decrement.

let reciprocal_precision: <F as Float>::Int = 220.cast();
let reciprocal_precision: F::Int = if F::BITS == 32
&& NUMBER_OF_HALF_ITERATIONS == 2
&& NUMBER_OF_FULL_ITERATIONS == 1
{
74.cast()
} else if F::BITS == 32 && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 {
10.cast()
} else if F::BITS == 64 && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 {
220.cast()
} else if F::BITS == 128 && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 {
13922.cast()
} else {
panic!("invalid iterations for the specified bits");
};

// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?

let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).

// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
Expand Down
5 changes: 5 additions & 0 deletions src/float/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use core::ops;

use crate::int::DInt;

use super::int::{Int, MinInt};

pub mod add;
Expand All @@ -12,6 +14,9 @@ pub mod pow;
pub mod sub;
pub mod trunc;

/// Wrapper to extract the integer type half of the float's size
pub(crate) type HalfRep<F: Float> = <F::Int as DInt>::H;

public_test_dep! {
/// Trait for some basic operations on floats
pub(crate) trait Float:
Expand Down
1 change: 1 addition & 0 deletions src/int/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ pub(crate) trait Int: MinInt
+ ops::ShrAssign<u32>
+ ops::Add<Output = Self>
+ ops::Sub<Output = Self>
+ ops::Mul<Output = Self>
+ ops::Div<Output = Self>
+ ops::Shr<u32, Output = Self>
+ ops::BitXor<Output = Self>
Expand Down
43 changes: 5 additions & 38 deletions testcrate/benches/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,6 @@ fn combine2<T: Copy>(vals: &[T]) -> Vec<(T, T)> {
ret
}

mod c {
extern "C" {
pub fn __addsf3(a: f64, b: f64) -> f64;
pub fn __adddf3(a: f64, b: f64) -> f64;
pub fn __addtf3(a: f64, b: f64) -> f64;
pub fn __subsf3(a: f64, b: f64) -> f64;
pub fn __subdf3(a: f64, b: f64) -> f64;
pub fn __subtf3(a: f64, b: f64) -> f64;
pub fn __mulsf3(a: f64, b: f64) -> f64;
pub fn __muldf3(a: f64, b: f64) -> f64;
pub fn __multf3(a: f64, b: f64) -> f64;
pub fn __divsf3(a: f64, b: f64) -> f64;
pub fn __divdf3(a: f64, b: f64) -> f64;
pub fn __divtf3(a: f64, b: f64) -> f64;
}
}

macro_rules! test_iter {
($b:ident, $ty:ty, $fn:path) => {{
let vals = combine2(test_values!($ty));
Expand Down Expand Up @@ -96,28 +79,12 @@ macro_rules! foobar {
}

foobar! {
f32, addsf3_rust, addsf3_builtin, add, __addsf3;
f32, subsf3_rust, subsf3_builtin, sub, __subsf3;
f32, mulsf3_rust, mulsf3_builtin, mul, __mulsf3;
f32, divsf3_rust, divsf3_builtin, div, __divsf3;
f64, adddf3_rust, adddf3_builtin, add, __adddf3;
f64, subdf3_rust, subdf3_builtin, sub, __subdf3;
f64, muldf3_rust, muldf3_builtin, mul, __muldf3;
// f64, divdf3_rust, divdf3_builtin, div, __divdf3;
f64, divdf3_rust, divdf3_builtin, div, __divdf3;
}

// #[bench]
// fn adddf3_rust(b: &mut Bencher) {
// foo!(b, f64, compiler_builtins::float::add::__adddf3);
// }

// #[bench]
// fn adddf3_builtin(b: &mut Bencher) {
// unsafe { foo!(b, f64, c::__adddf3) };
// }

// #[bench]
// fn subdf3_rust(b: &mut Bencher) {
// foo!(b, f64, compiler_builtins::float::sub::__subdf3);
// }

// #[bench]
// fn subdf3_builtin(b: &mut Bencher) {
// unsafe { foo!(b, f64, c::__subdf3) };
// }

0 comments on commit 0e755e8

Please sign in to comment.