From fe98215490e0489fdf126b9adda4e2504af85e33 Mon Sep 17 00:00:00 2001 From: Ryan Cumming Date: Sun, 5 Jan 2020 11:39:45 +1100 Subject: [PATCH 1/2] Make source type generic **THIS IS A BREAKING CHANGE** When porting Arret from `codespan` 0.3 (see etaoins/arret#160) one of the missing features was being able use non-`String` types for storing source. Arret has two potential uses for this: 1. When loading native libraries exposing Arret functions (known as RFI libraries) their Arret type is specified as a per-function `&'static str`. An example can be seen here: https://github.com/etaoins/arret/blob/dfc41bed1c992cc57023359743f68c639d667f25/stdlib/rust/list.rs#L8 With `codespan` 0.3 we used `Cow<'static str>` to avoid allocating a new `String` for every RFI function by pointing directly inside the loaded library's binary. 2. Arret is multithreaded but needs to maintain a global `Files` instance to allocate `FileId`s. It uses a read-write lock for thread safety and is sensitive to contention on that lock in some circumstances. Because we need a `FileId` in our AST we can't parse the source to AST until we've added it to `Files`. However, because `Files` takes ownership of the source we need to hold a read lock on `Files` to use `files.source()`. The effectively serialises loading source files across all threads. Previously we used `Arc` to drop the lock on `CodeMap` and then parse the `FileMap` locklessly. The API doesn't allow this anymore but we could use `Source = Arc` to similar effect. Unfortunately the above two uses are mutually exclusive using standard library types. However, either one is an improvement over `String` and a crate type like `supercow` could potentially satisfy both uses. This is a breaking change due to adding a new generic parameter to `Files`. I initially attempted to use a default of `String` but this doesn't work for `Files::new` due to rust-lang/rust#27336. See rust-lang/wg-allocators#1 for an analogous case to this. --- codespan-reporting/src/term/mod.rs | 12 ++- .../src/term/views/diagnostic.rs | 31 ++++-- .../src/term/views/source_snippet/mod.rs | 16 ++- codespan/src/file.rs | 100 ++++++++++++------ 4 files changed, 108 insertions(+), 51 deletions(-) diff --git a/codespan-reporting/src/term/mod.rs b/codespan-reporting/src/term/mod.rs index 4174e14c..19578c0d 100644 --- a/codespan-reporting/src/term/mod.rs +++ b/codespan-reporting/src/term/mod.rs @@ -15,12 +15,15 @@ pub use termcolor; pub use self::config::{Chars, Config, DisplayStyle, Styles}; /// Emit a diagnostic using the given writer, context, config, and files. -pub fn emit( +pub fn emit( writer: &mut (impl WriteColor + ?Sized), config: &Config, - files: &Files, + files: &Files, diagnostic: &Diagnostic, -) -> io::Result<()> { +) -> io::Result<()> +where + Source: AsRef, +{ use self::views::{RichDiagnostic, ShortDiagnostic}; match config.display_style { @@ -97,7 +100,8 @@ mod tests { #[test] fn unsized_emit() { - let mut files = Files::new(); + let mut files = Files::<&'static str>::new(); + let id = files.add("test", ""); emit( &mut termcolor::NoColor::new(Vec::::new()) as &mut dyn WriteColor, diff --git a/codespan-reporting/src/term/views/diagnostic.rs b/codespan-reporting/src/term/views/diagnostic.rs index 3209b703..b8f5a6d3 100644 --- a/codespan-reporting/src/term/views/diagnostic.rs +++ b/codespan-reporting/src/term/views/diagnostic.rs @@ -8,13 +8,19 @@ use crate::term::Config; use super::{Header, Locus, NewLine, SourceSnippet}; /// Output a richly formatted diagnostic, with source code previews. -pub struct RichDiagnostic<'a> { - files: &'a Files, +pub struct RichDiagnostic<'a, Source> +where + Source: AsRef, +{ + files: &'a Files, diagnostic: &'a Diagnostic, } -impl<'a> RichDiagnostic<'a> { - pub fn new(files: &'a Files, diagnostic: &'a Diagnostic) -> RichDiagnostic<'a> { +impl<'a, Source> RichDiagnostic<'a, Source> +where + Source: AsRef, +{ + pub fn new(files: &'a Files, diagnostic: &'a Diagnostic) -> Self { RichDiagnostic { files, diagnostic } } @@ -63,13 +69,22 @@ impl<'a> RichDiagnostic<'a> { } /// Output a short diagnostic, with a line number, severity, and message. -pub struct ShortDiagnostic<'a> { - files: &'a Files, +pub struct ShortDiagnostic<'a, Source> +where + Source: AsRef, +{ + files: &'a Files, diagnostic: &'a Diagnostic, } -impl<'a> ShortDiagnostic<'a> { - pub fn new(files: &'a Files, diagnostic: &'a Diagnostic) -> ShortDiagnostic<'a> { +impl<'a, Source> ShortDiagnostic<'a, Source> +where + Source: AsRef, +{ + pub fn new( + files: &'a Files, + diagnostic: &'a Diagnostic, + ) -> ShortDiagnostic<'a, Source> { ShortDiagnostic { files, diagnostic } } diff --git a/codespan-reporting/src/term/views/source_snippet/mod.rs b/codespan-reporting/src/term/views/source_snippet/mod.rs index c115af6e..572d2346 100644 --- a/codespan-reporting/src/term/views/source_snippet/mod.rs +++ b/codespan-reporting/src/term/views/source_snippet/mod.rs @@ -30,20 +30,26 @@ pub use self::underline::MarkStyle; /// = expected type `Int` /// found type `String` /// ``` -pub struct SourceSnippet<'a> { - files: &'a Files, +pub struct SourceSnippet<'a, Source> +where + Source: AsRef, +{ + files: &'a Files, file_id: FileId, spans: Vec<(&'a Label, MarkStyle)>, notes: &'a [String], } -impl<'a> SourceSnippet<'a> { +impl<'a, Source> SourceSnippet<'a, Source> +where + Source: AsRef, +{ pub fn new( - files: &'a Files, + files: &'a Files, file_id: FileId, spans: Vec<(&'a Label, MarkStyle)>, notes: &'a [String], - ) -> SourceSnippet<'a> { + ) -> Self { SourceSnippet { files, file_id, diff --git a/codespan/src/file.rs b/codespan/src/file.rs index a519fcb5..0191a861 100644 --- a/codespan/src/file.rs +++ b/codespan/src/file.rs @@ -40,7 +40,7 @@ impl fmt::Display for LocationError { ), LocationError::InvalidCharBoundary { given } => { write!(f, "Byte index within character boundary - given: {}", given) - }, + } } } } @@ -68,22 +68,44 @@ impl fmt::Display for SpanOutOfBoundsError { #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))] #[cfg_attr(feature = "memory_usage", derive(heapsize_derive::HeapSizeOf))] pub struct FileId(u32); - /// A database of source files. -#[derive(Clone, Debug, Default)] -pub struct Files { - files: Vec, +/// +/// The `Source` generic parameter determines how source text is stored. Using [`String`] will have +/// `Files` take ownership of all source text. Smart pointer types such as [`Cow<'_, str>`], +/// [`Rc`] or [`Arc`] can be used to share the source text with the rest of the program. +/// +/// [`Cow<'_, str>`]: std::borrow::Cow +/// [`Rc`]: std::rc::Rc +/// [`Arc`]: std::sync::Arc +#[derive(Clone, Debug)] +pub struct Files +where + Source: AsRef, +{ + files: Vec>, } -impl Files { +impl Default for Files +where + Source: AsRef, +{ + fn default() -> Self { + Self { files: vec![] } + } +} + +impl Files +where + Source: AsRef, +{ /// Create a new, empty database of files. - pub fn new() -> Files { - Files::default() + pub fn new() -> Self { + Files::::default() } /// Add a file to the database, returning the handle that can be used to /// refer to it again. - pub fn add(&mut self, name: impl Into, source: impl Into) -> FileId { + pub fn add(&mut self, name: impl Into, source: impl Into) -> FileId { let file_id = FileId(self.files.len() as u32); self.files.push(File::new(name.into(), source.into())); file_id @@ -93,19 +115,19 @@ impl Files { /// /// This will mean that any outstanding byte indexes will now point to /// invalid locations. - pub fn update(&mut self, file_id: FileId, source: impl Into) { + pub fn update(&mut self, file_id: FileId, source: impl Into) { self.get_mut(file_id).update(source.into()) } /// Get a the source file using the file id. // FIXME: return an option or result? - fn get(&self, file_id: FileId) -> &File { + fn get(&self, file_id: FileId) -> &File { &self.files[file_id.0 as usize] } /// Get a the source file using the file id. // FIXME: return an option or result? - fn get_mut(&mut self, file_id: FileId) -> &mut File { + fn get_mut(&mut self, file_id: FileId) -> &mut File { &mut self.files[file_id.0 as usize] } @@ -116,7 +138,7 @@ impl Files { /// /// let name = "test"; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add(name, "hello world!"); /// /// assert_eq!(files.name(file_id), name); @@ -130,7 +152,7 @@ impl Files { /// ```rust /// use codespan::{Files, LineIndex, LineIndexOutOfBoundsError, Span}; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add("test", "foo\nbar\r\n\nbaz"); /// /// let line_sources = (0..5) @@ -164,7 +186,7 @@ impl Files { /// ```rust /// use codespan::{ByteIndex, Files, Location, LocationError, Span}; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add("test", "foo\nbar\r\n\nbaz"); /// /// assert_eq!(files.location(file_id, 0), Ok(Location::new(0, 0))); @@ -194,12 +216,12 @@ impl Files { /// /// let source = "hello world!"; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add("test", source); /// /// assert_eq!(files.source(file_id), source); /// ``` - pub fn source(&self, file_id: FileId) -> &str { + pub fn source(&self, file_id: FileId) -> &Source { self.get(file_id).source() } @@ -210,7 +232,7 @@ impl Files { /// /// let source = "hello world!"; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add("test", source); /// /// assert_eq!(files.source_span(file_id), Span::from_str(source)); @@ -224,7 +246,7 @@ impl Files { /// ```rust /// use codespan::{Files, Span}; /// - /// let mut files = Files::new(); + /// let mut files = Files::::new(); /// let file_id = files.add("test", "hello world!"); /// /// assert_eq!(files.source_slice(file_id, Span::new(0, 5)), Ok("hello")); @@ -243,11 +265,14 @@ impl Files { #[derive(Debug, Clone)] #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))] #[cfg_attr(feature = "memory_usage", derive(heapsize_derive::HeapSizeOf))] -struct File { +struct File +where + Source: AsRef, +{ /// The name of the file. name: String, /// The source code of the file. - source: String, + source: Source, /// The starting byte indices in the source code. line_starts: Vec, } @@ -260,9 +285,12 @@ fn compute_line_starts(source: &str) -> Vec { .collect() } -impl File { - fn new(name: String, source: String) -> File { - let line_starts = compute_line_starts(&source); +impl File +where + Source: AsRef, +{ + fn new(name: String, source: Source) -> Self { + let line_starts = compute_line_starts(source.as_ref()); File { name, @@ -271,8 +299,8 @@ impl File { } } - fn update(&mut self, source: String) { - let line_starts = compute_line_starts(&source); + fn update(&mut self, source: Source) { + let line_starts = compute_line_starts(source.as_ref()); self.source = source; self.line_starts = line_starts; } @@ -321,7 +349,8 @@ impl File { span: self.source_span(), })?; let line_src = self - .source() + .source + .as_ref() .get(line_start_index.to_usize()..byte_index.to_usize()) .ok_or_else(|| { let given = byte_index; @@ -337,24 +366,24 @@ impl File { line: line_index, column: ColumnIndex::from(line_src.graphemes(true).count() as u32), }) - }, + } } } - fn source(&self) -> &str { + fn source(&self) -> &Source { &self.source } fn source_span(&self) -> Span { - Span::from_str(self.source()) + Span::from_str(self.source.as_ref()) } fn source_slice(&self, span: Span) -> Result<&str, SpanOutOfBoundsError> { let start = span.start().to_usize(); let end = span.end().to_usize(); - self.source.get(start..end).ok_or_else(|| { - let span = Span::from_str(self.source()); + self.source.as_ref().get(start..end).ok_or_else(|| { + let span = Span::from_str(self.source.as_ref()); SpanOutOfBoundsError { given: span, span } }) } @@ -370,7 +399,7 @@ mod test { #[test] fn line_starts() { - let mut files = Files::new(); + let mut files = Files::::new(); let file_id = files.add("test", TEST_SOURCE); assert_eq!( @@ -386,7 +415,10 @@ mod test { #[test] fn line_span_sources() { - let mut files = Files::new(); + // Also make sure we can use `Arc` for source + use std::sync::Arc; + + let mut files = Files::>::new(); let file_id = files.add("test", TEST_SOURCE); let line_sources = (0..4) From b0d957464089f4ddc3e18d1b6e395830a13958f5 Mon Sep 17 00:00:00 2001 From: Ryan Cumming Date: Sun, 5 Jan 2020 12:20:03 +1100 Subject: [PATCH 2/2] Update reporting & lsp crates for generic source This ports `codespan-reporting` and `codespan-lsp` to use `Files`'s new `Source` generic parameter. --- codespan-lsp/src/lib.rs | 28 ++++++++++++++----------- codespan-reporting/examples/term.rs | 2 +- codespan-reporting/src/term/mod.rs | 7 ++----- codespan-reporting/tests/support/mod.rs | 2 +- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/codespan-lsp/src/lib.rs b/codespan-lsp/src/lib.rs index 6b812d86..cdba46a7 100644 --- a/codespan-lsp/src/lib.rs +++ b/codespan-lsp/src/lib.rs @@ -90,8 +90,8 @@ fn location_to_position( } } -pub fn byte_index_to_position( - files: &Files, +pub fn byte_index_to_position>( + files: &Files, file_id: FileId, byte_index: ByteIndex, ) -> Result { @@ -103,7 +103,11 @@ pub fn byte_index_to_position( location_to_position(line_str, location.line, column, byte_index) } -pub fn byte_span_to_range(files: &Files, file_id: FileId, span: Span) -> Result { +pub fn byte_span_to_range>( + files: &Files, + file_id: FileId, + span: Span, +) -> Result { Ok(lsp::Range { start: byte_index_to_position(files, file_id, span.start())?, end: byte_index_to_position(files, file_id, span.end())?, @@ -137,8 +141,8 @@ pub fn character_to_line_offset(line: &str, character: u64) -> Result>( + files: &Files, file_id: FileId, position: &lsp::Position, ) -> Result { @@ -149,8 +153,8 @@ pub fn position_to_byte_index( Ok(line_span.start() + byte_offset) } -pub fn range_to_byte_span( - files: &Files, +pub fn range_to_byte_span>( + files: &Files, file_id: FileId, range: &lsp::Range, ) -> Result { @@ -176,8 +180,8 @@ pub fn make_lsp_severity(severity: Severity) -> lsp::DiagnosticSeverity { /// `correlate_file_url` is necessary to resolve codespan `FileName`s /// /// `code` and `file` are left empty by this function -pub fn make_lsp_diagnostic( - files: &Files, +pub fn make_lsp_diagnostic>( + files: &Files, source: impl Into>, diagnostic: Diagnostic, mut correlate_file_url: impl FnMut(FileId) -> Result, @@ -256,7 +260,7 @@ let test = 2 let test1 = "" test "#; - let mut files = Files::new(); + let mut files = Files::<&'static str>::new(); let file_id = files.add("test", text); let pos = position_to_byte_index( &files, @@ -276,7 +280,7 @@ test #[test] fn unicode_get_byte_index() { - let mut files = Files::new(); + let mut files = Files::<&'static str>::new(); let file_id = files.add("unicode", UNICODE); let result = position_to_byte_index( @@ -302,7 +306,7 @@ test #[test] fn unicode_get_position() { - let mut files = Files::new(); + let mut files = Files::<&'static str>::new(); let file_id = files.add("unicode", UNICODE); let result = byte_index_to_position(&files, file_id, ByteIndex::from(5)); diff --git a/codespan-reporting/examples/term.rs b/codespan-reporting/examples/term.rs index debb8ba3..5c6d2eec 100644 --- a/codespan-reporting/examples/term.rs +++ b/codespan-reporting/examples/term.rs @@ -21,7 +21,7 @@ pub struct Opts { fn main() { let opts = Opts::from_args(); - let mut files = Files::new(); + let mut files = Files::::new(); let file_id1 = files.add( "Data/Nat.fun", diff --git a/codespan-reporting/src/term/mod.rs b/codespan-reporting/src/term/mod.rs index 19578c0d..41ea5d1c 100644 --- a/codespan-reporting/src/term/mod.rs +++ b/codespan-reporting/src/term/mod.rs @@ -15,15 +15,12 @@ pub use termcolor; pub use self::config::{Chars, Config, DisplayStyle, Styles}; /// Emit a diagnostic using the given writer, context, config, and files. -pub fn emit( +pub fn emit>( writer: &mut (impl WriteColor + ?Sized), config: &Config, files: &Files, diagnostic: &Diagnostic, -) -> io::Result<()> -where - Source: AsRef, -{ +) -> io::Result<()> { use self::views::{RichDiagnostic, ShortDiagnostic}; match config.display_style { diff --git a/codespan-reporting/tests/support/mod.rs b/codespan-reporting/tests/support/mod.rs index ddca5d76..47c9dcb3 100644 --- a/codespan-reporting/tests/support/mod.rs +++ b/codespan-reporting/tests/support/mod.rs @@ -8,7 +8,7 @@ mod color_buffer; use self::color_buffer::ColorBuffer; pub struct TestData { - pub files: Files, + pub files: Files, pub diagnostics: Vec, }