perf(rome_js_formatter): Reduce FormatElement size

This PR reduces the size of a `FormatElement` from 56 to 32 bytes by: * Using a `Box<str>` to store a dynamic token's string as it doesn't need to be mutable (safes 8 bytes for the `capacity`) * Only storing the start position for a `DynamicToken` because the length isn't used by the printer * Change the semantics of `verbatim_ranges` to store the ranges in the **formatted** string. The ranges in the formatted output should be sufficient for debugging and it allows to resolve the `String` rather than having to store it on the `FormatElement` (range can be computed in the printer). This improves the formatter's performance by about 10%
rome · Apr 16, 2022 · 384026a · 384026a
1 parent 03c946e
commit 384026a
Show file tree

Hide file tree

Showing 9 changed files with 96 additions and 86 deletions.
diff --git a/crates/rome_formatter/src/format_element.rs b/crates/rome_formatter/src/format_element.rs
@@ -1,6 +1,6 @@
-use crate::format_elements;
 use crate::intersperse::{Intersperse, IntersperseFn};
-use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange};
+use crate::{format_elements, TextSize};
+use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
 use std::borrow::Cow;
 use std::fmt::{self, Debug, Formatter};
 use std::ops::Deref;
@@ -823,46 +823,28 @@ where
     L: Language,
 {
     /// Get the number of line breaks between two consecutive SyntaxNodes in the tree
-    fn get_lines_between_nodes<L: Language>(
-        prev_node: &SyntaxNode<L>,
-        next_node: &SyntaxNode<L>,
-    ) -> usize {
-        // Ensure the two nodes are actually siblings on debug
-        debug_assert_eq!(prev_node.next_sibling().as_ref(), Some(next_node));
-        debug_assert_eq!(next_node.prev_sibling().as_ref(), Some(prev_node));
-
-        // Count the lines separating the two statements,
-        // starting with the trailing trivia of the previous node
-        let mut line_count = prev_node
-            .last_trailing_trivia()
-            .and_then(|prev_token| {
-                // Newline pieces can only come last in trailing trivias, skip to it directly
-                prev_token.pieces().next_back()?.as_newline()
-            })
-            .is_some() as usize;
-
-        // Then add the newlines in the leading trivia of the next node
+    fn get_lines_before<L: Language>(next_node: &SyntaxNode<L>) -> usize {
+        // Count the newlines in the leading trivia of the next node
         if let Some(leading_trivia) = next_node.first_leading_trivia() {
-            for piece in leading_trivia.pieces() {
-                if piece.is_newline() {
-                    line_count += 1;
-                } else if piece.is_comments() {
-                    // Stop at the first comment piece, the comment printer
-                    // will handle newlines between the comment and the node
-                    break;
-                }
-            }
+            leading_trivia
+                .pieces()
+                .take_while(|piece|
+                // Stop at the first comment piece, the comment printer
+                // will handle newlines between the comment and the node
+                !piece.is_comments())
+                .filter(|piece| piece.is_newline())
+                .count()
+        } else {
+            0
         }
-
-        line_count
     }
 
     concat_elements(IntersperseFn::new(
         elements.into_iter(),
-        |prev_node, next_node, next_elem| {
+        |_, next_node, next_elem| {
             if next_elem.is_empty() {
                 empty_element()
-            } else if get_lines_between_nodes(prev_node, next_node) > 1 {
+            } else if get_lines_before(next_node) > 1 {
                 empty_line()
             } else {
                 separator()
@@ -928,10 +910,8 @@ pub enum VerbatimKind {
     Unknown,
     Suppressed,
     Verbatim {
-        /// the range that belongs to the node/token formatted verbatim
-        range: TextRange,
-        /// the text that belongs to the node/token formatted verbatim
-        text: String,
+        /// the length of the formatted node
+        length: TextSize,
     },
 }
 
@@ -945,10 +925,10 @@ pub struct Verbatim {
 }
 
 impl Verbatim {
-    pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self {
+    pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self {
         Self {
             element: Box::new(element),
-            kind: VerbatimKind::Verbatim { range, text },
+            kind: VerbatimKind::Verbatim { length },
         }
     }
 
@@ -1139,16 +1119,21 @@ pub enum Token {
     Static { text: &'static str },
     /// Token constructed from the input source as a dynamics
     /// string and a range of the input source
-    Dynamic { text: String, source: TextRange },
+    Dynamic {
+        // There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
+        text: Box<str>,
+        // The position of the dynamic token in the unformatted source code
+        source_position: TextSize,
+    },
 }
 
 impl Debug for Token {
     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
         // This does not use debug_tuple so the tokens are
         // written on a single line even when pretty-printing
         match self {
-            Token::Static { text } => write!(fmt, "Token({:?})", text),
-            Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source),
+            Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
+            Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
         }
     }
 }
@@ -1160,17 +1145,22 @@ impl Token {
     }
 
     /// Create a token from a dynamic string and a range of the input source
-    pub fn new_dynamic(text: String, source: TextRange) -> Self {
+    pub fn new_dynamic(text: String, position: TextSize) -> Self {
         debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
-        Self::Dynamic { text, source }
+        Self::Dynamic {
+            text: text.into_boxed_str(),
+            source_position: position,
+        }
     }
 
     /// Get the range of the input source covered by this token,
     /// or None if the token was synthesized by the formatter
-    pub fn source(&self) -> Option<&TextRange> {
+    pub fn source(&self) -> Option<&TextSize> {
         match self {
             Token::Static { .. } => None,
-            Token::Dynamic { source, .. } => Some(source),
+            Token::Dynamic {
+                source_position, ..
+            } => Some(source_position),
         }
     }
 }
@@ -1190,7 +1180,10 @@ impl<L: Language> From<SyntaxToken<L>> for Token {
 
 impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
     fn from(token: &'a SyntaxToken<L>) -> Self {
-        Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range())
+        Self::new_dynamic(
+            token.text_trimmed().into(),
+            token.text_trimmed_range().start(),
+        )
     }
 }
 
@@ -1230,7 +1223,7 @@ impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
     fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
         Self::new_dynamic(
             normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
-            trivia.text_range(),
+            trivia.text_range().start(),
         )
     }
 }
@@ -1398,9 +1391,10 @@ impl From<Option<FormatElement>> for FormatElement {
 mod tests {
 
     use crate::format_element::{
-        empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS,
+        empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List,
+        VerbatimKind, LINE_TERMINATORS,
     };
-    use crate::{concat_elements, space_token, token, FormatElement};
+    use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim};
 
     #[test]
     fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() {
@@ -1516,4 +1510,19 @@ mod tests {
         assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb");
         assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb");
     }
+
+    #[test]
+    fn sizes() {
+        assert_eq!(8, std::mem::size_of::<TextRange>());
+        assert_eq!(8, std::mem::size_of::<VerbatimKind>());
+        assert_eq!(16, std::mem::size_of::<Verbatim>());
+        assert_eq!(24, std::mem::size_of::<Token>());
+        assert_eq!(16, std::mem::size_of::<ConditionalGroupContent>());
+        assert_eq!(24, std::mem::size_of::<List>());
+        // Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint.
+        // Is there a more efficient way to encode the data to avoid increasing its size? Can the information
+        // be recomputed at a later point in time?
+        // You reduced the size of a format element? Excellent work!
+        assert_eq!(32, std::mem::size_of::<FormatElement>());
+    }
 }
diff --git a/crates/rome_formatter/src/lib.rs b/crates/rome_formatter/src/lib.rs
@@ -203,21 +203,21 @@ pub struct Formatted {
     code: String,
     range: Option<TextRange>,
     sourcemap: Vec<SourceMarker>,
-    verbatim_source: Vec<(String, TextRange)>,
+    verbatim_ranges: Vec<TextRange>,
 }
 
 impl Formatted {
     pub fn new(
         code: String,
         range: Option<TextRange>,
         sourcemap: Vec<SourceMarker>,
-        verbatim_source: Vec<(String, TextRange)>,
+        verbatim_source: Vec<TextRange>,
     ) -> Self {
         Self {
             code,
             range,
             sourcemap,
-            verbatim_source,
+            verbatim_ranges: verbatim_source,
         }
     }
 
@@ -227,7 +227,7 @@ impl Formatted {
             code: String::new(),
             range: None,
             sourcemap: Vec::new(),
-            verbatim_source: Vec::new(),
+            verbatim_ranges: Vec::new(),
         }
     }
 
@@ -259,12 +259,16 @@ impl Formatted {
         self.code
     }
 
-    pub fn verbatim(&self) -> &[(String, TextRange)] {
-        &self.verbatim_source
+    /// The text in the formatted code that has been formatted as verbatim.
+    pub fn verbatim(&self) -> impl Iterator<Item = (TextRange, &str)> {
+        self.verbatim_ranges
+            .iter()
+            .map(|range| (*range, &self.code[*range]))
     }
 
-    pub fn into_verbatim(self) -> Vec<(String, TextRange)> {
-        self.verbatim_source
+    /// Ranges of the formatted code that have been formatted as verbatim.
+    pub fn verbatim_ranges(&self) -> &[TextRange] {
+        &self.verbatim_ranges
     }
 }
 

diff --git a/crates/rome_formatter/src/printer.rs b/crates/rome_formatter/src/printer.rs
@@ -171,9 +171,9 @@ impl<'a> Printer<'a> {
                     self.state.pending_space = false;
                 }
 
-                if let Some(range) = token.source() {
+                if let Some(source) = token.source() {
                     self.state.source_markers.push(SourceMarker {
-                        source: range.start(),
+                        source: *source,
                         dest: TextSize::from(self.state.buffer.len() as u32),
                     });
                 }
@@ -271,8 +271,11 @@ impl<'a> Printer<'a> {
             }
 
             FormatElement::Verbatim(verbatim) => {
-                if let VerbatimKind::Verbatim { range, text } = &verbatim.kind {
-                    self.state.verbatim_markers.push((text.clone(), *range));
+                if let VerbatimKind::Verbatim { length } = &verbatim.kind {
+                    self.state.verbatim_markers.push(TextRange::at(
+                        TextSize::from(self.state.buffer.len() as u32),
+                        *length,
+                    ));
                 }
 
                 queue.enqueue(PrintElementCall::new(&verbatim.element, args));
@@ -499,9 +502,8 @@ struct PrinterState<'a> {
     generated_column: usize,
     line_width: usize,
     has_empty_line: bool,
-    // mappings: Mapping[];
     line_suffixes: Vec<PrintElementCall<'a>>,
-    verbatim_markers: Vec<(String, TextRange)>,
+    verbatim_markers: Vec<TextRange>,
 }
 
 impl<'a> PrinterState<'a> {

diff --git a/crates/rome_js_formatter/src/formatter.rs b/crates/rome_js_formatter/src/formatter.rs
@@ -433,7 +433,7 @@ impl Formatter {
         let text = &token.text()[relative_skipped_range];
         elements.push(FormatElement::from(Token::new_dynamic(
             text.to_string(),
-            skipped_trivia_range,
+            skipped_trivia_range.start(),
         )));
 
         // `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except
@@ -604,17 +604,13 @@ impl Formatter {
     /// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors.
     ///
     /// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes
-    ///nor its children source mapping information, resulting in incorrect source maps for this subtree.
+    /// nor its children source mapping information, resulting in incorrect source maps for this subtree.
     ///
     /// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand
     /// if these nodes still need to have their own implementation.
     pub fn format_verbatim(&self, node: &JsSyntaxNode) -> FormatElement {
         let verbatim = self.format_verbatim_node_or_token(node);
-        FormatElement::Verbatim(Verbatim::new_verbatim(
-            verbatim,
-            node.to_string(),
-            node.text_range(),
-        ))
+        FormatElement::Verbatim(Verbatim::new_verbatim(verbatim, node.text_range().len()))
     }
 
     /// Formats unknown nodes. The difference between this method  and `format_verbatim` is that this method
@@ -653,7 +649,7 @@ impl Formatter {
         fn trivia_token<L: Language>(piece: SyntaxTriviaPiece<L>) -> Token {
             Token::new_dynamic(
                 normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(),
-                piece.text_range(),
+                piece.text_range().start(),
             )
         }
 
@@ -666,7 +662,7 @@ impl Formatter {
 
         let content = Token::new_dynamic(
             normalize_newlines(&node.text_trimmed().to_string(), LINE_TERMINATORS).into_owned(),
-            node.text_trimmed_range(),
+            node.text_trimmed_range().start(),
         );
 
         // Clippy false positive: SkipWhile does not implement DoubleEndedIterator

diff --git a/crates/rome_js_formatter/src/lib.rs b/crates/rome_js_formatter/src/lib.rs
@@ -222,13 +222,13 @@ pub fn format_range(
     let input_range = TextRange::new(start_source, end_source);
     let output_range = TextRange::new(start_dest, end_dest);
     let sourcemap = Vec::from(formatted.sourcemap());
-    let verbatim = Vec::from(formatted.verbatim());
+    let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
     let code = &formatted.into_code()[output_range];
     Ok(Formatted::new(
         code.into(),
         Some(input_range),
         sourcemap,
-        verbatim,
+        verbatim_ranges,
     ))
 }
 
@@ -294,12 +294,12 @@ pub fn format_node(options: FormatOptions, root: &JsSyntaxNode) -> FormatResult<
     let element = Formatter::new(options).format_root(root)?;
     let formatted = Printer::new(options).print_with_indent(&element, initial_indent);
     let sourcemap = Vec::from(formatted.sourcemap());
-    let verbatim = Vec::from(formatted.verbatim());
+    let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
     Ok(Formatted::new(
         formatted.into_code(),
         Some(root.text_range()),
         sourcemap,
-        verbatim,
+        verbatim_ranges,
     ))
 }
 

diff --git a/crates/rome_js_formatter/src/ts/types/intersection_type.rs b/crates/rome_js_formatter/src/ts/types/intersection_type.rs
@@ -21,7 +21,7 @@ impl ToFormatElement for TsIntersectionType {
                 // if_group_breaks block to avoid removing comments when the
                 // group does not break
                 let replaced =
-                    if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
+                    if_group_breaks(format_elements![Token::from(&token), space_token()]);
                 formatter.format_replaced(&token, replaced)
             }
             None => if_group_breaks(format_elements![token("&"), space_token()]),

diff --git a/crates/rome_js_formatter/src/ts/types/union_type.rs b/crates/rome_js_formatter/src/ts/types/union_type.rs
@@ -21,7 +21,7 @@ impl ToFormatElement for TsUnionType {
                 // if_group_breaks block to avoid removing comments when the
                 // group does not break
                 let replaced =
-                    if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
+                    if_group_breaks(format_elements![Token::from(&token), space_token()]);
                 formatter.format_replaced(&token, replaced)
             }
             None => if_group_breaks(format_elements![token("|"), space_token()]),