From 0e154618a1e89331044803b0730543e23ff80c37 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 8 Oct 2023 16:30:03 -0700 Subject: [PATCH 1/4] Make FileInfo mut in source_text to allow amortization of char indices --- src/fallback.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/fallback.rs b/src/fallback.rs index 70deb4b..ff7f935 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -362,7 +362,7 @@ impl FileInfo { span.lo >= self.span.lo && span.hi <= self.span.hi } - fn source_text(&self, span: Span) -> String { + fn source_text(&mut self, span: Span) -> String { let lo = (span.lo - self.span.lo) as usize; let trunc_lo = match self.source_text.char_indices().nth(lo) { Some((offset, _ch)) => &self.source_text[offset..], @@ -448,6 +448,15 @@ impl SourceMap { } unreachable!("Invalid span with no related FileInfo!"); } + + fn fileinfo_mut(&mut self, span: Span) -> &mut FileInfo { + for file in &mut self.files { + if file.span_within(span) { + return file; + } + } + unreachable!("Invalid span with no related FileInfo!"); + } } #[derive(Clone, Copy, PartialEq, Eq)] @@ -572,7 +581,7 @@ impl Span { if self.is_call_site() { None } else { - Some(SOURCE_MAP.with(|cm| cm.borrow().fileinfo(*self).source_text(*self))) + Some(SOURCE_MAP.with(|cm| cm.borrow_mut().fileinfo_mut(*self).source_text(*self))) } } } From 31b14c30f290d0e46942b25a24fe2a1ce59f9a82 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 8 Oct 2023 16:32:02 -0700 Subject: [PATCH 2/4] Cache byte offsets computed from a char index --- src/fallback.rs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/fallback.rs b/src/fallback.rs index ff7f935..6631dc8 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -4,6 +4,8 @@ use crate::parse::{self, Cursor}; use crate::rcvec::{RcVec, RcVecBuilder, RcVecIntoIter, RcVecMut}; use crate::{Delimiter, Spacing, TokenTree}; #[cfg(all(span_locations, not(fuzzing)))] +use alloc::collections::BTreeMap; +#[cfg(all(span_locations, not(fuzzing)))] use core::cell::RefCell; #[cfg(span_locations)] use core::cmp; @@ -327,6 +329,7 @@ thread_local! { source_text: String::new(), span: Span { lo: 0, hi: 0 }, lines: vec![0], + char_index_to_byte_offset: BTreeMap::new(), }], }); } @@ -336,6 +339,7 @@ struct FileInfo { source_text: String, span: Span, lines: Vec, + char_index_to_byte_offset: BTreeMap, } #[cfg(all(span_locations, not(fuzzing)))] @@ -363,11 +367,27 @@ impl FileInfo { } fn source_text(&mut self, span: Span) -> String { - let lo = (span.lo - self.span.lo) as usize; - let trunc_lo = match self.source_text.char_indices().nth(lo) { - Some((offset, _ch)) => &self.source_text[offset..], - None => return String::new(), + let lo_char = (span.lo - self.span.lo) as usize; + let (&last_char_index, &last_byte_offset) = self + .char_index_to_byte_offset + .range(..=lo_char) + .next_back() + .unwrap_or((&0, &0)); + let lo_byte = if last_char_index == lo_char { + last_byte_offset + } else { + let total_byte_offset = match self.source_text[last_byte_offset..] + .char_indices() + .nth(lo_char - last_char_index) + { + Some((additional_offset, _ch)) => last_byte_offset + additional_offset, + None => self.source_text.len(), + }; + self.char_index_to_byte_offset + .insert(lo_char, total_byte_offset); + total_byte_offset }; + let trunc_lo = &self.source_text[lo_byte..]; let char_len = (span.hi - span.lo) as usize; let source_text = match trunc_lo.char_indices().nth(char_len) { Some((offset, _ch)) => &trunc_lo[..offset], @@ -421,6 +441,7 @@ impl SourceMap { source_text: src.to_owned(), span, lines, + char_index_to_byte_offset: BTreeMap::new(), }); span From c4c3251c57747263f48a38ac8fb5a76a17f8fa8c Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 8 Oct 2023 17:53:54 -0700 Subject: [PATCH 3/4] Explain source_text implementation approach --- src/fallback.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/fallback.rs b/src/fallback.rs index 6631dc8..7f559cf 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -368,11 +368,16 @@ impl FileInfo { fn source_text(&mut self, span: Span) -> String { let lo_char = (span.lo - self.span.lo) as usize; + + // Look up offset of the largest already-computed char index that is + // less than or equal to the current requested one. We resume counting + // chars from that point. let (&last_char_index, &last_byte_offset) = self .char_index_to_byte_offset .range(..=lo_char) .next_back() .unwrap_or((&0, &0)); + let lo_byte = if last_char_index == lo_char { last_byte_offset } else { @@ -387,6 +392,7 @@ impl FileInfo { .insert(lo_char, total_byte_offset); total_byte_offset }; + let trunc_lo = &self.source_text[lo_byte..]; let char_len = (span.hi - span.lo) as usize; let source_text = match trunc_lo.char_indices().nth(char_len) { @@ -441,6 +447,7 @@ impl SourceMap { source_text: src.to_owned(), span, lines, + // Populated lazily by source_text(). char_index_to_byte_offset: BTreeMap::new(), }); From 6461c2dd607aabec87d88144a7ffe7e00a2b2991 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 8 Oct 2023 17:55:27 -0700 Subject: [PATCH 4/4] Add out-of-order call to source_text test --- tests/test.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index 1916a85..b75cd55 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -328,17 +328,19 @@ fn literal_span() { #[cfg(span_locations)] #[test] fn source_text() { - let input = " 𓀕 c "; + let input = " 𓀕 a z "; let mut tokens = input .parse::() .unwrap() .into_iter(); - let ident = tokens.next().unwrap(); - assert_eq!("𓀕", ident.span().source_text().unwrap()); + let first = tokens.next().unwrap(); + assert_eq!("𓀕", first.span().source_text().unwrap()); - let ident = tokens.next().unwrap(); - assert_eq!("c", ident.span().source_text().unwrap()); + let second = tokens.next().unwrap(); + let third = tokens.next().unwrap(); + assert_eq!("z", third.span().source_text().unwrap()); + assert_eq!("a", second.span().source_text().unwrap()); } #[test]