From 4f4a5a7349da2326980bffd0f5e30cf418f2ab2b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 11:54:12 -0700 Subject: [PATCH] Use macros for more division/array checks This commit moves over more array accesses to the `i!` macro to avoid bounds checks when debug assertions are disabled. This is surfaced from rust-lang/compiler-builtins#360 where recent changes in codegen units has caused some bounds checks to not get elided in release mode. This also adds a `div!` macro to work around rust-lang/rust#72751. --- src/math/atanf.rs | 8 ++++---- src/math/exp.rs | 2 +- src/math/exp2.rs | 6 +++--- src/math/exp2f.rs | 2 +- src/math/expf.rs | 2 +- src/math/mod.rs | 14 ++++++++++++++ src/math/pow.rs | 12 ++++++------ src/math/powf.rs | 12 ++++++------ src/math/rem_pio2.rs | 12 ++++++------ src/math/rem_pio2_large.rs | 4 ++-- 10 files changed, 44 insertions(+), 30 deletions(-) diff --git a/src/math/atanf.rs b/src/math/atanf.rs index 73f3352e..d042b3bc 100644 --- a/src/math/atanf.rs +++ b/src/math/atanf.rs @@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 { if x.is_nan() { return x; } - z = ATAN_HI[3] + x1p_120; + z = i!(ATAN_HI, 3) + x1p_120; return if sign { -z } else { z }; } let id = if ix < 0x3ee00000 { @@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 { z = x * x; let w = z * z; /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ - let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4])); - let s2 = w * (A_T[1] + w * A_T[3]); + let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4))); + let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3)); if id < 0 { return x - x * (s1 + s2); } let id = id as usize; - let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x); + let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); if sign { -z } else { diff --git a/src/math/exp.rs b/src/math/exp.rs index 5b163f95..d4994277 100644 --- a/src/math/exp.rs +++ b/src/math/exp.rs @@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 { /* if |x| > 0.5 ln2 */ if hx >= 0x3ff0a2b2 { /* if |x| >= 1.5 ln2 */ - k = (INVLN2 * x + HALF[sign as usize]) as i32; + k = (INVLN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } diff --git a/src/math/exp2.rs b/src/math/exp2.rs index 8ea434dc..e0e385df 100644 --- a/src/math/exp2.rs +++ b/src/math/exp2.rs @@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 { let mut i0 = ui as u32; i0 = i0.wrapping_add(TBLSIZE as u32 / 2); let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; - let ki = ku as i32 / TBLSIZE as i32; + let ki = div!(ku as i32, TBLSIZE as i32); i0 %= TBLSIZE as u32; let uf = f64::from_bits(ui) - redux; let mut z = x - uf; /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ - let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */ - z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0] */ + let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */ + z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */ let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); scalbn(r, ki) diff --git a/src/math/exp2f.rs b/src/math/exp2f.rs index 8a890b83..f4867b80 100644 --- a/src/math/exp2f.rs +++ b/src/math/exp2f.rs @@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 { uf -= redux; let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ - let r: f64 = f64::from_bits(EXP2FT[i0 as usize]); + let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); let t: f64 = r as f64 * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); diff --git a/src/math/expf.rs b/src/math/expf.rs index 47c1b2c4..a53aa90a 100644 --- a/src/math/expf.rs +++ b/src/math/expf.rs @@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 { /* if |x| > 0.5 ln2 */ if hx > 0x3f851592 { /* if |x| > 1.5 ln2 */ - k = (INV_LN2 * x + HALF[sign as usize]) as i32; + k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } diff --git a/src/math/mod.rs b/src/math/mod.rs index c8d7bd81..9b64677c 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -58,6 +58,20 @@ macro_rules! i { }; } +#[cfg(any(debug_assertions, not(feature = "unstable")))] +macro_rules! div { + ($a:expr, $b:expr) => { + $a / $b + }; +} + +#[cfg(all(not(debug_assertions), feature = "unstable"))] +macro_rules! div { + ($a:expr, $b:expr) => { + unsafe { core::intrinsics::unchecked_div($a, $b) } + }; +} + macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { #[cfg(all(feature = "unstable", $($clause)*))] diff --git a/src/math/pow.rs b/src/math/pow.rs index ce8e83ee..c7fd0dfa 100644 --- a/src/math/pow.rs +++ b/src/math/pow.rs @@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 { ax = with_set_high_word(ax, ix as u32); /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ - let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */ - let v: f64 = 1.0 / (ax + BP[k as usize]); + let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + let v: f64 = 1.0 / (ax + i!(BP, k as usize)); let ss: f64 = u * v; let s_h = with_set_low_word(ss, 0); @@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 { 0.0, ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), ); - let t_l: f64 = ax - (t_h - BP[k as usize]); + let t_l: f64 = ax - (t_h - i!(BP, k as usize)); let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ @@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 { let p_h: f64 = with_set_low_word(u + v, 0); let p_l = v - (p_h - u); let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ - let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize]; + let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ let t: f64 = n as f64; - t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0); - t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); } /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ diff --git a/src/math/powf.rs b/src/math/powf.rs index f3cf76f9..68d2083b 100644 --- a/src/math/powf.rs +++ b/src/math/powf.rs @@ -238,8 +238,8 @@ pub fn powf(x: f32, y: f32) -> f32 { ax = f32::from_bits(ix as u32); /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ - u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */ - v = 1.0 / (ax + BP[k as usize]); + u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + v = 1.0 / (ax + i!(BP, k as usize)); s = u * v; s_h = s; is = s_h.to_bits() as i32; @@ -247,7 +247,7 @@ pub fn powf(x: f32, y: f32) -> f32 { /* t_h=ax+bp[k] High */ is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32; t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); - t_l = ax - (t_h - BP[k as usize]); + t_l = ax - (t_h - i!(BP, k as usize)); s_l = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ s2 = s * s; @@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 { p_h = f32::from_bits(is as u32 & 0xfffff000); p_l = v - (p_h - u); z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ - z_l = CP_L * p_h + p_l * CP + DP_L[k as usize]; + z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ t = n as f32; - t1 = ((z_h + z_l) + DP_H[k as usize]) + t; + t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; is = t1.to_bits() as i32; t1 = f32::from_bits(is as u32 & 0xfffff000); - t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); }; /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs index 6b7dbd34..46f7c38f 100644 --- a/src/math/rem_pio2.rs +++ b/src/math/rem_pio2.rs @@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let mut z = f64::from_bits(ui); let mut tx = [0.0; 3]; for i in 0..2 { - tx[i] = z as i32 as f64; - z = (z - tx[i]) * x1p24; + i!(tx,i, =, z as i32 as f64); + z = (z - i!(tx, i)) * x1p24; } - tx[2] = z; + i!(tx,2, =, z); /* skip zero terms, first term is non-zero */ let mut i = 2; - while i != 0 && tx[i] == 0.0 { + while i != 0 && i!(tx, i) == 0.0 { i -= 1; } let mut ty = [0.0; 3]; let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); if sign != 0 { - return (-n, -ty[0], -ty[1]); + return (-n, -i!(ty, 0), -i!(ty, 1)); } - (n, ty[0], ty[1]) + (n, i!(ty, 0), i!(ty, 1)) } #[cfg(test)] diff --git a/src/math/rem_pio2_large.rs b/src/math/rem_pio2_large.rs index 002ce2e2..65473f0a 100644 --- a/src/math/rem_pio2_large.rs +++ b/src/math/rem_pio2_large.rs @@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> let mut iq: [i32; 20] = [0; 20]; /* initialize jk*/ - let jk = INIT_JK[prec]; + let jk = i!(INIT_JK, prec); let jp = jk; /* determine jx,jv,q0, note that 3>q0 */ let jx = nx - 1; - let mut jv = (e0 - 3) / 24; + let mut jv = div!(e0 - 3, 24); if jv < 0 { jv = 0; }