From 4f4a5a7349da2326980bffd0f5e30cf418f2ab2b Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Fri, 29 May 2020 11:54:12 -0700
Subject: [PATCH] Use macros for more division/array checks

This commit moves over more array accesses to the `i!` macro to avoid
bounds checks when debug assertions are disabled. This is surfaced from
rust-lang/compiler-builtins#360 where recent changes in codegen units
has caused some bounds checks to not get elided in release mode. This
also adds a `div!` macro to work around rust-lang/rust#72751.
---
 src/math/atanf.rs          |  8 ++++----
 src/math/exp.rs            |  2 +-
 src/math/exp2.rs           |  6 +++---
 src/math/exp2f.rs          |  2 +-
 src/math/expf.rs           |  2 +-
 src/math/mod.rs            | 14 ++++++++++++++
 src/math/pow.rs            | 12 ++++++------
 src/math/powf.rs           | 12 ++++++------
 src/math/rem_pio2.rs       | 12 ++++++------
 src/math/rem_pio2_large.rs |  4 ++--
 10 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/src/math/atanf.rs b/src/math/atanf.rs
index 73f3352e..d042b3bc 100644
--- a/src/math/atanf.rs
+++ b/src/math/atanf.rs
@@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 {
         if x.is_nan() {
             return x;
         }
-        z = ATAN_HI[3] + x1p_120;
+        z = i!(ATAN_HI, 3) + x1p_120;
         return if sign { -z } else { z };
     }
     let id = if ix < 0x3ee00000 {
@@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 {
     z = x * x;
     let w = z * z;
     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
-    let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4]));
-    let s2 = w * (A_T[1] + w * A_T[3]);
+    let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4)));
+    let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3));
     if id < 0 {
         return x - x * (s1 + s2);
     }
     let id = id as usize;
-    let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x);
+    let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x);
     if sign {
         -z
     } else {
diff --git a/src/math/exp.rs b/src/math/exp.rs
index 5b163f95..d4994277 100644
--- a/src/math/exp.rs
+++ b/src/math/exp.rs
@@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 {
         /* if |x| > 0.5 ln2 */
         if hx >= 0x3ff0a2b2 {
             /* if |x| >= 1.5 ln2 */
-            k = (INVLN2 * x + HALF[sign as usize]) as i32;
+            k = (INVLN2 * x + i!(HALF, sign as usize)) as i32;
         } else {
             k = 1 - sign - sign;
         }
diff --git a/src/math/exp2.rs b/src/math/exp2.rs
index 8ea434dc..e0e385df 100644
--- a/src/math/exp2.rs
+++ b/src/math/exp2.rs
@@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 {
     let mut i0 = ui as u32;
     i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
     let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32;
-    let ki = ku as i32 / TBLSIZE as i32;
+    let ki = div!(ku as i32, TBLSIZE as i32);
     i0 %= TBLSIZE as u32;
     let uf = f64::from_bits(ui) - redux;
     let mut z = x - uf;
 
     /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
-    let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */
-    z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0]   */
+    let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */
+    z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0]   */
     let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5))));
 
     scalbn(r, ki)
diff --git a/src/math/exp2f.rs b/src/math/exp2f.rs
index 8a890b83..f4867b80 100644
--- a/src/math/exp2f.rs
+++ b/src/math/exp2f.rs
@@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 {
     uf -= redux;
     let z: f64 = (x - uf) as f64;
     /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
-    let r: f64 = f64::from_bits(EXP2FT[i0 as usize]);
+    let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
     let t: f64 = r as f64 * z;
     let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);
 
diff --git a/src/math/expf.rs b/src/math/expf.rs
index 47c1b2c4..a53aa90a 100644
--- a/src/math/expf.rs
+++ b/src/math/expf.rs
@@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 {
         /* if |x| > 0.5 ln2 */
         if hx > 0x3f851592 {
             /* if |x| > 1.5 ln2 */
-            k = (INV_LN2 * x + HALF[sign as usize]) as i32;
+            k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32;
         } else {
             k = 1 - sign - sign;
         }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index c8d7bd81..9b64677c 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -58,6 +58,20 @@ macro_rules! i {
     };
 }
 
+#[cfg(any(debug_assertions, not(feature = "unstable")))]
+macro_rules! div {
+    ($a:expr, $b:expr) => {
+        $a / $b
+    };
+}
+
+#[cfg(all(not(debug_assertions), feature = "unstable"))]
+macro_rules! div {
+    ($a:expr, $b:expr) => {
+        unsafe { core::intrinsics::unchecked_div($a, $b) }
+    };
+}
+
 macro_rules! llvm_intrinsically_optimized {
     (#[cfg($($clause:tt)*)] $e:expr) => {
         #[cfg(all(feature = "unstable", $($clause)*))]
diff --git a/src/math/pow.rs b/src/math/pow.rs
index ce8e83ee..c7fd0dfa 100644
--- a/src/math/pow.rs
+++ b/src/math/pow.rs
@@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
         ax = with_set_high_word(ax, ix as u32);
 
         /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-        let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
-        let v: f64 = 1.0 / (ax + BP[k as usize]);
+        let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
+        let v: f64 = 1.0 / (ax + i!(BP, k as usize));
         let ss: f64 = u * v;
         let s_h = with_set_low_word(ss, 0);
 
@@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
             0.0,
             ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18),
         );
-        let t_l: f64 = ax - (t_h - BP[k as usize]);
+        let t_l: f64 = ax - (t_h - i!(BP, k as usize));
         let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l);
 
         /* compute log(ax) */
@@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 {
         let p_h: f64 = with_set_low_word(u + v, 0);
         let p_l = v - (p_h - u);
         let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
-        let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize];
+        let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
 
         /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
         let t: f64 = n as f64;
-        t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0);
-        t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
+        t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0);
+        t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
     }
 
     /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
diff --git a/src/math/powf.rs b/src/math/powf.rs
index f3cf76f9..68d2083b 100644
--- a/src/math/powf.rs
+++ b/src/math/powf.rs
@@ -238,8 +238,8 @@ pub fn powf(x: f32, y: f32) -> f32 {
         ax = f32::from_bits(ix as u32);
 
         /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-        u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
-        v = 1.0 / (ax + BP[k as usize]);
+        u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
+        v = 1.0 / (ax + i!(BP, k as usize));
         s = u * v;
         s_h = s;
         is = s_h.to_bits() as i32;
@@ -247,7 +247,7 @@ pub fn powf(x: f32, y: f32) -> f32 {
         /* t_h=ax+bp[k] High */
         is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32;
         t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21));
-        t_l = ax - (t_h - BP[k as usize]);
+        t_l = ax - (t_h - i!(BP, k as usize));
         s_l = v * ((u - s_h * t_h) - s_h * t_l);
         /* compute log(ax) */
         s2 = s * s;
@@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
         p_h = f32::from_bits(is as u32 & 0xfffff000);
         p_l = v - (p_h - u);
         z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
-        z_l = CP_L * p_h + p_l * CP + DP_L[k as usize];
+        z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
         /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
         t = n as f32;
-        t1 = ((z_h + z_l) + DP_H[k as usize]) + t;
+        t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t;
         is = t1.to_bits() as i32;
         t1 = f32::from_bits(is as u32 & 0xfffff000);
-        t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
+        t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
     };
 
     /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs
index 6b7dbd34..46f7c38f 100644
--- a/src/math/rem_pio2.rs
+++ b/src/math/rem_pio2.rs
@@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
     let mut z = f64::from_bits(ui);
     let mut tx = [0.0; 3];
     for i in 0..2 {
-        tx[i] = z as i32 as f64;
-        z = (z - tx[i]) * x1p24;
+        i!(tx,i, =, z as i32 as f64);
+        z = (z - i!(tx, i)) * x1p24;
     }
-    tx[2] = z;
+    i!(tx,2, =, z);
     /* skip zero terms, first term is non-zero */
     let mut i = 2;
-    while i != 0 && tx[i] == 0.0 {
+    while i != 0 && i!(tx, i) == 0.0 {
         i -= 1;
     }
     let mut ty = [0.0; 3];
     let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1);
     if sign != 0 {
-        return (-n, -ty[0], -ty[1]);
+        return (-n, -i!(ty, 0), -i!(ty, 1));
     }
-    (n, ty[0], ty[1])
+    (n, i!(ty, 0), i!(ty, 1))
 }
 
 #[cfg(test)]
diff --git a/src/math/rem_pio2_large.rs b/src/math/rem_pio2_large.rs
index 002ce2e2..65473f0a 100644
--- a/src/math/rem_pio2_large.rs
+++ b/src/math/rem_pio2_large.rs
@@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
     let mut iq: [i32; 20] = [0; 20];
 
     /* initialize jk*/
-    let jk = INIT_JK[prec];
+    let jk = i!(INIT_JK, prec);
     let jp = jk;
 
     /* determine jx,jv,q0, note that 3>q0 */
     let jx = nx - 1;
-    let mut jv = (e0 - 3) / 24;
+    let mut jv = div!(e0 - 3, 24);
     if jv < 0 {
         jv = 0;
     }