From b1edc7d13f72880fd0ac569403a409e5f7961d5f Mon Sep 17 00:00:00 2001 From: Alisa Sireneva Date: Sat, 27 Jul 2024 00:22:11 +0300 Subject: [PATCH] Optimize position search in error path Translating index into a line/column pair takes considerable time. Notably, the JSON benchmark modified to run on malformed data spends around 50% of the CPU time generating the error object. While it is generally assumed that the cold path is quite slow, such a drastic pessimization may be unexpected, especially when a faster implementation exists. Using vectorized routines provided by the memchr crate increases performance of the failure path by 2x on average. Old implementation: DOM STRUCT data/canada.json 122 MB/s 168 MB/s data/citm_catalog.json 135 MB/s 195 MB/s data/twitter.json 142 MB/s 226 MB/s New implementation: DOM STRUCT data/canada.json 216 MB/s 376 MB/s data/citm_catalog.json 238 MB/s 736 MB/s data/twitter.json 210 MB/s 492 MB/s In comparison, the performance of the happy path is: DOM STRUCT data/canada.json 283 MB/s 416 MB/s data/citm_catalog.json 429 MB/s 864 MB/s data/twitter.json 275 MB/s 541 MB/s While this introduces a new dependency, memchr is much faster to compile than serde, so compile time does not increase significantly. Additionally, memchr provides a more efficient SWAR-based implementation of both the memchr and count routines even without std, providing benefits for embedded uses as well. --- Cargo.toml | 3 ++- src/read.rs | 19 +++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6a8e74526..8f8b45582 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ rust-version = "1.56" [dependencies] indexmap = { version = "2.2.3", optional = true } itoa = "1.0" +memchr = { version = "2", default-features = false } ryu = "1.0" serde = { version = "1.0.194", default-features = false } @@ -45,7 +46,7 @@ features = ["raw_value"] [features] default = ["std"] -std = ["serde/std"] +std = ["memchr/std", "serde/std"] # Provide integration for heap-allocated collections without depending on the # rest of the Rust standard library. diff --git a/src/read.rs b/src/read.rs index a426911c7..e03e13f28 100644 --- a/src/read.rs +++ b/src/read.rs @@ -415,19 +415,14 @@ impl<'a> SliceRead<'a> { } fn position_of_index(&self, i: usize) -> Position { - let mut position = Position { line: 1, column: 0 }; - for ch in &self.slice[..i] { - match *ch { - b'\n' => { - position.line += 1; - position.column = 0; - } - _ => { - position.column += 1; - } - } + let start_of_line = match memchr::memrchr(b'\n', &self.slice[..i]) { + Some(position) => position + 1, + None => 0, + }; + Position { + line: 1 + memchr::memchr_iter(b'\n', &self.slice[..start_of_line]).count(), + column: i - start_of_line, } - position } /// The big optimization here over IoRead is that if the string contains no