Skip to content

Commit

Permalink
Further Implement Power of Two Optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
NCGThompson committed Jan 23, 2024
1 parent 971e37f commit 9dccd5d
Show file tree
Hide file tree
Showing 3 changed files with 350 additions and 185 deletions.
259 changes: 186 additions & 73 deletions library/core/src/num/int_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,26 +901,59 @@ macro_rules! int_impl {
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
#[must_use = "this returns the result of the operation, \
without modifying the original"]
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
#[inline]
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
if exp == 0 {
return Some(1);
}
let mut base = self;
let mut acc: Self = 1;
// SAFETY: This path has the same behavior as the other.
if unsafe { intrinsics::is_val_statically_known(self) }
&& self.unsigned_abs().is_power_of_two()
{
if self == 1 { // Avoid divide by zero
return Some(1);
}
if self == -1 { // Avoid divide by zero
return Some(if exp & 1 != 0 { -1 } else { 1 });
}
// SAFETY: We just checked this is a power of two. and above zero.
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
if exp > Self::BITS / power_used { return None; } // Division of constants is free

// SAFETY: exp <= Self::BITS / power_used
let res = unsafe { intrinsics::unchecked_shl(
1 as Self,
intrinsics::unchecked_mul(power_used, exp) as Self
)};
// LLVM doesn't always optimize out the checks
// at the ir level.

let sign = self.is_negative() && exp & 1 != 0;
if !sign && res == Self::MIN {
None
} else if sign {
Some(res.wrapping_neg())
} else {
Some(res)
}
} else {
if exp == 0 {
return Some(1);
}
let mut base = self;
let mut acc: Self = 1;

while exp > 1 {
if (exp & 1) == 1 {
acc = try_opt!(acc.checked_mul(base));
while exp > 1 {
if (exp & 1) == 1 {
acc = try_opt!(acc.checked_mul(base));
}
exp /= 2;
base = try_opt!(base.checked_mul(base));
}
exp /= 2;
base = try_opt!(base.checked_mul(base));
// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
acc.checked_mul(base)
}
// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
acc.checked_mul(base)
}

/// Returns the square root of the number, rounded down.
Expand Down Expand Up @@ -1537,27 +1570,58 @@ macro_rules! int_impl {
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
#[must_use = "this returns the result of the operation, \
without modifying the original"]
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
#[inline]
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
if exp == 0 {
return 1;
}
let mut base = self;
let mut acc: Self = 1;
// SAFETY: This path has the same behavior as the other.
if unsafe { intrinsics::is_val_statically_known(self) }
&& self.unsigned_abs().is_power_of_two()
{
if self == 1 { // Avoid divide by zero
return 1;
}
if self == -1 { // Avoid divide by zero
return if exp & 1 != 0 { -1 } else { 1 };
}
// SAFETY: We just checked this is a power of two. and above zero.
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
if exp > Self::BITS / power_used { return 0; } // Division of constants is free

// SAFETY: exp <= Self::BITS / power_used
let res = unsafe { intrinsics::unchecked_shl(
1 as Self,
intrinsics::unchecked_mul(power_used, exp) as Self
)};
// LLVM doesn't always optimize out the checks
// at the ir level.

let sign = self.is_negative() && exp & 1 != 0;
if sign {
res.wrapping_neg()
} else {
res
}
} else {
if exp == 0 {
return 1;
}
let mut base = self;
let mut acc: Self = 1;

while exp > 1 {
if (exp & 1) == 1 {
acc = acc.wrapping_mul(base);
while exp > 1 {
if (exp & 1) == 1 {
acc = acc.wrapping_mul(base);
}
exp /= 2;
base = base.wrapping_mul(base);
}
exp /= 2;
base = base.wrapping_mul(base);
}

// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
acc.wrapping_mul(base)
// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
acc.wrapping_mul(base)
}
}

/// Calculates `self` + `rhs`
Expand Down Expand Up @@ -2039,36 +2103,68 @@ macro_rules! int_impl {
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
#[must_use = "this returns the result of the operation, \
without modifying the original"]
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
#[inline]
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
if exp == 0 {
return (1,false);
}
let mut base = self;
let mut acc: Self = 1;
let mut overflown = false;
// Scratch space for storing results of overflowing_mul.
let mut r;

while exp > 1 {
if (exp & 1) == 1 {
r = acc.overflowing_mul(base);
acc = r.0;
// SAFETY: This path has the same behavior as the other.
if unsafe { intrinsics::is_val_statically_known(self) }
&& self.unsigned_abs().is_power_of_two()
{
if self == 1 { // Avoid divide by zero
return (1, false);
}
if self == -1 { // Avoid divide by zero
return (if exp & 1 != 0 { -1 } else { 1 }, false);
}
// SAFETY: We just checked this is a power of two. and above zero.
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free

// SAFETY: exp <= Self::BITS / power_used
let res = unsafe { intrinsics::unchecked_shl(
1 as Self,
intrinsics::unchecked_mul(power_used, exp) as Self
)};
// LLVM doesn't always optimize out the checks
// at the ir level.

let sign = self.is_negative() && exp & 1 != 0;
let overflow = res == Self::MIN;
if sign {
(res.wrapping_neg(), overflow)
} else {
(res, overflow)
}
} else {
if exp == 0 {
return (1,false);
}
let mut base = self;
let mut acc: Self = 1;
let mut overflown = false;
// Scratch space for storing results of overflowing_mul.
let mut r;

while exp > 1 {
if (exp & 1) == 1 {
r = acc.overflowing_mul(base);
acc = r.0;
overflown |= r.1;
}
exp /= 2;
r = base.overflowing_mul(base);
base = r.0;
overflown |= r.1;
}
exp /= 2;
r = base.overflowing_mul(base);
base = r.0;
overflown |= r.1;
}

// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
r = acc.overflowing_mul(base);
r.1 |= overflown;
r
// since exp!=0, finally the exp must be 1.
// Deal with the final bit of the exponent separately, since
// squaring the base afterwards is not necessary and may cause a
// needless overflow.
r = acc.overflowing_mul(base);
r.1 |= overflown;
r
}
}

/// Raises self to the power of `exp`, using exponentiation by squaring.
Expand All @@ -2086,30 +2182,47 @@ macro_rules! int_impl {
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
#[must_use = "this returns the result of the operation, \
without modifying the original"]
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
#[inline]
#[rustc_inherit_overflow_checks]
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
#[track_caller] // Hides the hackish overflow check for powers of two.
pub const fn pow(self, mut exp: u32) -> Self {
// SAFETY: This path has the same behavior as the other.
if unsafe { intrinsics::is_val_statically_known(self) }
&& self > 0
&& (self & (self - 1) == 0)
&& self.unsigned_abs().is_power_of_two()
{
let power_used = match self.checked_ilog2() {
Some(v) => v,
// SAFETY: We just checked this is a power of two. and above zero.
None => unsafe { core::hint::unreachable_unchecked() },
};
// So it panics. Have to use `overflowing_mul` to efficiently set the
// result to 0 if not.
#[cfg(debug_assertions)]
{
_ = power_used * exp;
if self == 1 { // Avoid divide by zero
return 1;
}
if self == -1 { // Avoid divide by zero
return if exp & 1 != 0 { -1 } else { 1 };
}
// SAFETY: We just checked this is a power of two. and above zero.
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
if exp > Self::BITS / power_used { // Division of constants is free
#[allow(arithmetic_overflow)]
return Self::MAX * Self::MAX * 0;
}

// SAFETY: exp <= Self::BITS / power_used
let res = unsafe { intrinsics::unchecked_shl(
1 as Self,
intrinsics::unchecked_mul(power_used, exp) as Self
)};
// LLVM doesn't always optimize out the checks
// at the ir level.

let sign = self.is_negative() && exp & 1 != 0;
#[allow(arithmetic_overflow)]
if !sign && res == Self::MIN {
// So it panics.
_ = Self::MAX * Self::MAX;
}
if sign {
res.wrapping_neg()
} else {
res
}
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
let fine = !overflowed
& (num_shl < (mem::size_of::<Self>() * 8) as u32);
(1 << num_shl) * fine as Self
} else {
if exp == 0 {
return 1;
Expand Down
Loading

0 comments on commit 9dccd5d

Please sign in to comment.