Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(parser): faster parsing decimal numbers #4257

Merged
merged 1 commit into from
Jul 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 73 additions & 7 deletions crates/oxc_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
//! Parsing utilities for converting Javascript numbers to Rust f64
//! code copied from [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/parser/src/numeric_value.rs)
//! Parsing utilities for converting Javascript numbers to Rust f64.
//! Code copied originally from
//! [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/parser/src/numeric_value.rs)
//! but iterated on since.

use std::borrow::Cow;

use num_bigint::BigInt;
use num_traits::Num as _;
use num_traits::Num;

use super::kind::Kind;

Expand All @@ -26,7 +28,7 @@ pub fn parse_float(s: &str, has_sep: bool) -> Result<f64, &'static str> {
/// Parsing will fail if this assumption is violated.
fn parse_int_without_underscores(s: &str, kind: Kind) -> Result<f64, &'static str> {
match kind {
Kind::Decimal => parse_float_without_underscores(s),
Kind::Decimal => Ok(parse_decimal(s)),
Kind::Binary => Ok(parse_binary(&s[2..])),
Kind::Octal => {
let s = if s.starts_with("0o") || s.starts_with("0O") {
Expand All @@ -47,12 +49,65 @@ fn parse_float_without_underscores(s: &str) -> Result<f64, &'static str> {
s.parse::<f64>().map_err(|_| "invalid float")
}

// ==================================== DECIMAL ====================================

/// b'0' is 0x30 and b'9' is 0x39.
///
/// So we can convert from any decimal digit to its value with `c & 15`.
/// This is produces more compact assembly than `c - b'0'`.
///
/// <https://godbolt.org/z/WMarz15sq>
#[inline]
const fn decimal_byte_to_value(c: u8) -> u8 {
debug_assert!(c >= b'0' && c <= b'9');
c & 15
}

#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn parse_decimal(s: &str) -> f64 {
/// Numeric strings longer than this have the chance to overflow u64.
/// `u64::MAX + 1` in decimal is 18446744073709551616 (20 chars).
const MAX_FAST_DECIMAL_LEN: usize = 19;

debug_assert!(!s.is_empty());
if s.len() > MAX_FAST_DECIMAL_LEN {
return parse_decimal_slow(s);
}

let mut result = 0_u64;
for c in s.as_bytes() {
// The latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
result *= 10;
let n = decimal_byte_to_value(*c);
result += n as u64;
}
result as f64
}

#[cold]
#[inline(never)]
fn parse_decimal_slow(s: &str) -> f64 {
// NB: Cannot use the `mul_add` loop method that `parse_binary_slow` etc use here,
// as it produces an imprecise result.
// For the others it's fine, presumably because multiply by a power of 2
// just increments f64's exponent. But multiplying by 10 is more complex.
s.parse::<f64>().unwrap()
}

// ==================================== BINARY ====================================

/// b'0' is 0x30 and b'1' is 0x31.
///
/// So we can convert from binary digit to its value with `c & 1`.
/// This is produces more compact assembly than `c - b'0'`.
///
/// <https://godbolt.org/z/1vvrK78jf>
#[inline]
const fn binary_byte_to_value(c: u8) -> u8 {
debug_assert!(c == b'0' || c == b'1');
c & 1
Expand Down Expand Up @@ -113,6 +168,7 @@ fn parse_binary_slow(s: &str) -> f64 {
/// This is produces more compact assembly than `c - b'0'`.
///
/// <https://godbolt.org/z/9rYTsMoMM>
#[inline]
const fn octal_byte_to_value(c: u8) -> u8 {
debug_assert!(c >= b'0' && c <= b'7');
c & 7
Expand Down Expand Up @@ -166,6 +222,7 @@ fn parse_octal_slow(s: &str) -> f64 {
/// but only because compiler unrolls the loop.
///
/// <https://godbolt.org/z/5fsdv8rGo>
#[inline]
const fn hex_byte_to_value(c: u8) -> u8 {
debug_assert!((c >= b'0' && c <= b'9') || (c >= b'A' && c <= b'F') || (c >= b'a' && c <= b'f'));
if c < b'A' {
Expand Down Expand Up @@ -208,6 +265,8 @@ fn parse_hex_slow(s: &str) -> f64 {
result
}

// ==================================== BIGINT ====================================

pub fn parse_big_int(s: &str, kind: Kind, has_sep: bool) -> Result<BigInt, &'static str> {
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
debug_assert!(!s.contains('_'));
Expand Down Expand Up @@ -238,9 +297,7 @@ fn parse_big_int_without_underscores(s: &str, kind: Kind) -> Result<BigInt, &'st
#[cfg(test)]
#[allow(clippy::unreadable_literal, clippy::mixed_case_hex_literals)]
mod test {
use super::{
binary_byte_to_value, hex_byte_to_value, octal_byte_to_value, parse_float, parse_int, Kind,
};
use super::*;

#[allow(clippy::cast_precision_loss)]
fn assert_all_ints_eq<I>(test_cases: I, kind: Kind, has_sep: bool)
Expand Down Expand Up @@ -270,6 +327,10 @@ mod test {
}
}

// decimal
static_assertions::const_assert_eq!(decimal_byte_to_value(b'0'), 0);
static_assertions::const_assert_eq!(decimal_byte_to_value(b'9'), 9);

// binary
static_assertions::const_assert_eq!(binary_byte_to_value(b'0'), 0);
static_assertions::const_assert_eq!(binary_byte_to_value(b'1'), 1);
Expand All @@ -294,6 +355,11 @@ mod test {
parse_int("18446744073709551616", Kind::Decimal, false),
Ok(18446744073709551616_i128 as f64)
);
// This tests for imprecision which results from using `mul_add` loop
assert_eq!(
parse_int("12300000000000000000000000", Kind::Decimal, false),
Ok(12300000000000000000000000_i128 as f64)
);
assert_eq!(
// 0x10000000000000000 = 1 << 64
parse_int("0x10000000000000000", Kind::Hex, false),
Expand Down
Loading