Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster parsing for lower numbers for radix up to 16 (cont.) #95399

Merged
merged 15 commits into from
Apr 12, 2022
Merged
102 changes: 65 additions & 37 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use crate::ascii;
use crate::intrinsics;
use crate::mem;
use crate::ops::{Add, Mul, Sub};
use crate::str::FromStr;

// Used because the `?` operator is not allowed in a const context.
Expand Down Expand Up @@ -969,9 +970,10 @@ pub enum FpCategory {
}

#[doc(hidden)]
trait FromStrRadixHelper: PartialOrd + Copy {
fn min_value() -> Self;
fn max_value() -> Self;
trait FromStrRadixHelper:
PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
gilescope marked this conversation as resolved.
Show resolved Hide resolved
{
const MIN: Self;
fn from_u32(u: u32) -> Self;
fn checked_mul(&self, other: u32) -> Option<Self>;
fn checked_sub(&self, other: u32) -> Option<Self>;
Expand All @@ -991,12 +993,9 @@ macro_rules! from_str_radix_int_impl {
}
from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }

macro_rules! doit {
macro_rules! impl_helper_for {
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
#[inline]
fn min_value() -> Self { Self::MIN }
#[inline]
fn max_value() -> Self { Self::MAX }
const MIN: Self = Self::MIN;
#[inline]
fn from_u32(u: u32) -> Self { u as Self }
#[inline]
Expand All @@ -1013,7 +1012,18 @@ macro_rules! doit {
}
})*)
}
doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }

/// Determins if a string of text of that length of that radix could be guaranteed to be
/// stored in the given type T.
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
/// is done at runtime.
#[doc(hidden)]
#[inline(always)]
#[unstable(issue = "none", feature = "std_internals")]
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
}

fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
use self::IntErrorKind::*;
Expand All @@ -1029,7 +1039,7 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
return Err(PIE { kind: Empty });
}

let is_signed_ty = T::from_u32(0) > T::min_value();
let is_signed_ty = T::from_u32(0) > T::MIN;

// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
Expand All @@ -1047,38 +1057,56 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
};

let mut result = T::from_u32(0);
if is_positive {
// The number is positive
for &c in digits {
let x = match (c as char).to_digit(radix) {
Some(x) => x,
None => return Err(PIE { kind: InvalidDigit }),
};
result = match result.checked_mul(radix) {
Some(result) => result,
None => return Err(PIE { kind: PosOverflow }),
};
result = match result.checked_add(x) {
Some(result) => result,
None => return Err(PIE { kind: PosOverflow }),

if can_not_overflow::<T>(radix, is_signed_ty, digits) {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
macro_rules! run_unchecked_loop {
($unchecked_additive_op:expr) => {
for &c in digits {
result = result * T::from_u32(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = $unchecked_additive_op(result, T::from_u32(x));
}
};
}
if is_positive {
run_unchecked_loop!(<T as core::ops::Add>::add)
} else {
run_unchecked_loop!(<T as core::ops::Sub>::sub)
};
} else {
// The number is negative
for &c in digits {
let x = match (c as char).to_digit(radix) {
Some(x) => x,
None => return Err(PIE { kind: InvalidDigit }),
};
result = match result.checked_mul(radix) {
Some(result) => result,
None => return Err(PIE { kind: NegOverflow }),
};
result = match result.checked_sub(x) {
Some(result) => result,
None => return Err(PIE { kind: NegOverflow }),
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:expr) => {
for &c in digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = mul.ok_or_else($overflow_err)?;
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
}
};
}
if is_positive {
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
gilescope marked this conversation as resolved.
Show resolved Hide resolved
} else {
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
};
}
Ok(result)
}
1 change: 1 addition & 0 deletions library/core/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#![feature(numfmt)]
#![feature(step_trait)]
#![feature(str_internals)]
#![feature(std_internals)]
#![feature(test)]
#![feature(trusted_len)]
#![feature(try_blocks)]
Expand Down
77 changes: 76 additions & 1 deletion library/core/tests/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use core::cmp::PartialEq;
use core::convert::{TryFrom, TryInto};
use core::fmt::Debug;
use core::marker::Copy;
use core::num::{IntErrorKind, ParseIntError, TryFromIntError};
use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError};
use core::ops::{Add, Div, Mul, Rem, Sub};
use core::option::Option;
use core::option::Option::None;
Expand Down Expand Up @@ -120,6 +120,81 @@ fn test_int_from_str_overflow() {
test_parse::<i64>("-9223372036854775809", Err(IntErrorKind::NegOverflow));
}

#[test]
fn test_can_not_overflow() {
fn can_overflow<T>(radix: u32, input: &str) -> bool
where
T: Default
+ core::ops::Sub<Output = T>
+ std::convert::From<bool>
+ std::cmp::PartialOrd
+ Copy,
{
let one = true.into();
let zero = <T>::default();
!can_not_overflow::<T>(radix, zero - one < zero, input.as_bytes())
}

// Positive tests:
assert!(!can_overflow::<i8>(16, "F"));
assert!(!can_overflow::<u8>(16, "FF"));

assert!(!can_overflow::<i8>(10, "9"));
assert!(!can_overflow::<u8>(10, "99"));

// Negative tests:

// Not currently in std lib (issue: #27728)
fn format_radix<T>(mut x: T, radix: T) -> String
where
T: std::ops::Rem<Output = T>,
T: std::ops::Div<Output = T>,
T: std::cmp::PartialEq,
T: std::default::Default,
T: Copy,
T: Default,
u32: TryFrom<T>,
{
let mut result = vec![];

loop {
let m = x % radix;
x = x / radix;
result.push(
std::char::from_digit(m.try_into().ok().unwrap(), radix.try_into().ok().unwrap())
.unwrap(),
);
if x == T::default() {
break;
}
}
result.into_iter().rev().collect()
}

macro_rules! check {
($($t:ty)*) => ($(
for base in 2..=36 {
let num = (<$t>::MAX as u128) + 1;

// Calcutate the string length for the smallest overflowing number:
let max_len_string = format_radix(num, base as u128);
// Ensure that that string length is deemed to potentially overflow:
assert!(can_overflow::<$t>(base, &max_len_string));
}
)*)
}

check! { i8 i16 i32 i64 i128 isize usize u8 u16 u32 u64 }

// Check u128 separately:
for base in 2..=36 {
let num = u128::MAX as u128;
let max_len_string = format_radix(num, base as u128);
// base 16 fits perfectly for u128 and won't overflow:
assert_eq!(can_overflow::<u128>(base, &max_len_string), base != 16);
}
}

#[test]
fn test_leading_plus() {
test_parse::<u8>("+127", Ok(127));
Expand Down