Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
perf(rome_js_formatter): Reduce the String allocations for Tokens
Browse files Browse the repository at this point in the history
This PR reduces the amount of `String` allocation necessary for `FormatElement::Token`s by
making use of the observation that most tokens match the text of a `SyntaxToken`. For example,
identifiers, or punctuation tokens are kept by the formatter as is. This is even true for string literal tokens if they already use the right quotes.

The way this is implemented is by introducing a new `SyntaxTokenText` that is `Send + Sync` and allows referencing a slice in a `SyntaxToken` without worrying about the `&str`'s lifetime. The PR further extends `FormatElement::Token` to make use of this new introduced `SyntaxTokenText`.

This change reduces overall memory consumption and improves performance:

```
group                                    format-element                         token
-----                                    --------------                         -----
formatter/checker.ts                     1.04    250.2±5.03ms    10.4 MB/sec    1.00    239.5±1.76ms    10.9 MB/sec
formatter/compiler.js                    1.07    145.6±1.30ms     7.2 MB/sec    1.00    136.5±1.43ms     7.7 MB/sec
formatter/d3.min.js                      1.07    117.4±3.70ms     2.2 MB/sec    1.00    109.6±1.24ms     2.4 MB/sec
formatter/dojo.js                        1.03      7.4±0.15ms     9.2 MB/sec    1.00      7.2±0.03ms     9.5 MB/sec
formatter/ios.d.ts                       1.05    181.2±1.95ms    10.3 MB/sec    1.00    172.8±2.23ms    10.8 MB/sec
formatter/jquery.min.js                  1.02     29.1±0.55ms     2.8 MB/sec    1.00     28.5±0.07ms     2.9 MB/sec
formatter/math.js                        1.05    233.1±4.69ms     2.8 MB/sec    1.00    222.8±1.79ms     2.9 MB/sec
formatter/parser.ts                      1.03      5.3±0.15ms     9.2 MB/sec    1.00      5.1±0.01ms     9.5 MB/sec
formatter/pixi.min.js                    1.10    131.0±7.11ms     3.3 MB/sec    1.00    119.3±2.12ms     3.7 MB/sec
formatter/react-dom.production.min.js    1.07     37.0±0.82ms     3.1 MB/sec    1.00     34.5±0.21ms     3.3 MB/sec
formatter/react.production.min.js        1.08  1825.1±57.85µs     3.4 MB/sec    1.00  1683.8±30.49µs     3.7 MB/sec
formatter/router.ts                      1.02      3.7±0.09ms    16.2 MB/sec    1.00      3.6±0.01ms    16.6 MB/sec
formatter/tex-chtml-full.js              1.05    288.3±5.19ms     3.2 MB/sec    1.00    273.4±1.29ms     3.3 MB/sec
formatter/three.min.js                   1.11    155.7±3.79ms     3.8 MB/sec    1.00    139.7±1.76ms     4.2 MB/sec
formatter/typescript.js                  1.04    945.2±6.64ms    10.1 MB/sec    1.00    909.3±7.16ms    10.4 MB/sec
formatter/vue.global.prod.js             1.07     49.1±1.49ms     2.5 MB/sec    1.00     45.8±0.20ms     2.6 MB/sec

```
  • Loading branch information
MichaReiser committed Apr 16, 2022
1 parent 384026a commit e10d648
Show file tree
Hide file tree
Showing 16 changed files with 271 additions and 80 deletions.
2 changes: 1 addition & 1 deletion crates/rome_css_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod syntax_node;

pub use self::generated::*;
pub use rome_rowan::{
SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
};
pub use syntax_node::*;

Expand Down
90 changes: 78 additions & 12 deletions crates/rome_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::intersperse::{Intersperse, IntersperseFn};
use crate::{format_elements, TextSize};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
use crate::{format_elements, TextRange, TextSize};
use rome_rowan::{
Language, SyntaxNode, SyntaxToken, SyntaxTokenText, SyntaxTriviaPieceComments, TextLen,
};
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
Expand Down Expand Up @@ -1057,7 +1059,7 @@ impl List {
}

impl Deref for List {
type Target = Vec<FormatElement>;
type Target = [FormatElement];

fn deref(&self) -> &Self::Target {
&self.content
Expand Down Expand Up @@ -1125,6 +1127,13 @@ pub enum Token {
// The position of the dynamic token in the unformatted source code
source_position: TextSize,
},
// A token that is taken 1:1 from the source code
SyntaxTokenSlice {
/// The start position of the token in the unformatted source code
source_position: TextSize,
/// The token text
slice: SyntaxTokenText,
},
}

impl Debug for Token {
Expand All @@ -1134,6 +1143,11 @@ impl Debug for Token {
match self {
Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
Token::SyntaxTokenSlice {
slice: token_text, ..
} => {
write!(fmt, "SyntaxTokenSlice({:?})", token_text)
}
}
}
}
Expand All @@ -1146,21 +1160,69 @@ impl Token {

/// Create a token from a dynamic string and a range of the input source
pub fn new_dynamic(text: String, position: TextSize) -> Self {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
Self::assert_no_newlines(&text);
Self::Dynamic {
text: text.into_boxed_str(),
source_position: position,
}
}

/// Creates a token from a [Cow] that is a sub-slice over the text of a token.
///
/// The `start` is the absolute start of the token in the source text.
///
/// ## Returns
/// * [Token::Dynamic] if `text` is a [Cow::Owned] (text doesn't match syntax token text)
/// * [Token::SyntaxTokenSlice] if `text` is borrowed. Avoids allocating a new string.
pub fn from_syntax_token_cow_slice<L: Language>(
text: Cow<str>,
token: &SyntaxToken<L>,
start: TextSize,
) -> Self {
Self::assert_no_newlines(&text);

match text {
Cow::Owned(text) => Self::new_dynamic(text, start),
Cow::Borrowed(text) => {
let range = TextRange::at(start, text.text_len());
debug_assert_eq!(
text,
&token.text()[range - token.text_range().start()],
"The borrowed string doesn't match the specified token substring"
);
Token::new_syntax_token_slice(token, range)
}
}
}

/// Creates a new [Token] with a text backed by the string of [SyntaxToken]
pub fn new_syntax_token_slice<L: Language>(token: &SyntaxToken<L>, range: TextRange) -> Self {
let relative_range = range - token.text_range().start();
let slice = token.token_text().slice(relative_range);

Self::assert_no_newlines(&slice);

Self::SyntaxTokenSlice {
slice,
source_position: range.start(),
}
}

fn assert_no_newlines(text: &str) {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
}

/// Get the range of the input source covered by this token,
/// or None if the token was synthesized by the formatter
pub fn source(&self) -> Option<&TextSize> {
pub fn source_position(&self) -> Option<&TextSize> {
match self {
Token::Static { .. } => None,
Token::Dynamic {
source_position, ..
} => Some(source_position),
Token::SyntaxTokenSlice {
source_position, ..
} => Some(source_position),
}
}
}
Expand All @@ -1180,10 +1242,9 @@ impl<L: Language> From<SyntaxToken<L>> for Token {

impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
fn from(token: &'a SyntaxToken<L>) -> Self {
Self::new_dynamic(
token.text_trimmed().into(),
token.text_trimmed_range().start(),
)
let trimmed_range = token.text_trimmed_range();

Self::new_syntax_token_slice(token, trimmed_range)
}
}

Expand Down Expand Up @@ -1221,9 +1282,11 @@ pub fn normalize_newlines<const N: usize>(text: &str, terminators: [char; N]) ->

impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
Self::new_dynamic(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
trivia.text_range().start(),
let range = trivia.text_range();
Token::from_syntax_token_cow_slice(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS),
&trivia.as_piece().token(),
range.start(),
)
}
}
Expand All @@ -1234,6 +1297,9 @@ impl Deref for Token {
match self {
Token::Static { text } => text,
Token::Dynamic { text, .. } => text,
Token::SyntaxTokenSlice {
slice: token_text, ..
} => token_text.deref(),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_formatter/src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl<'a> Printer<'a> {
self.state.pending_space = false;
}

if let Some(source) = token.source() {
if let Some(source) = token.source_position() {
self.state.source_markers.push(SourceMarker {
source: *source,
dest: TextSize::from(self.state.buffer.len() as u32),
Expand Down
14 changes: 6 additions & 8 deletions crates/rome_js_formatter/src/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,9 @@ impl Formatter {
let skipped_trivia_range = skipped_trivia_range.expect("Only call this method for leading trivia containing at least one skipped token trivia.");

// Format the skipped token trivia range
// Compute the offsets relative to the tokens text
let relative_skipped_range = skipped_trivia_range - token.text_range().start();
let text = &token.text()[relative_skipped_range];
elements.push(FormatElement::from(Token::new_dynamic(
text.to_string(),
skipped_trivia_range.start(),
elements.push(FormatElement::from(Token::new_syntax_token_slice(
token,
skipped_trivia_range,
)));

// `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except
Expand Down Expand Up @@ -647,8 +644,9 @@ impl Formatter {
}

fn trivia_token<L: Language>(piece: SyntaxTriviaPiece<L>) -> Token {
Token::new_dynamic(
normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(),
Token::from_syntax_token_cow_slice(
normalize_newlines(piece.text(), LINE_TERMINATORS),
&piece.token(),
piece.text_range().start(),
)
}
Expand Down
21 changes: 7 additions & 14 deletions crates/rome_js_formatter/src/utils/binary_like_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,23 +210,16 @@ fn format_with_or_without_parenthesis(
};

let result = if operation_is_higher {
let formatted = if node.contains_comments() {
let (leading, content, trailing) = formatted_node.split_trivia();
format_elements![
leading,
group_elements(format_elements![
token("("),
soft_block_indent(format_elements![content, trailing]),
token(")")
])
]
} else {
let (leading, content, trailing) = formatted_node.split_trivia();
let formatted = format_elements![
leading,
group_elements(format_elements![
token("("),
soft_block_indent(formatted_node),
token(")"),
soft_block_indent(format_elements![content, trailing]),
token(")")
])
};
];

(formatted, true)
} else {
(formatted_node, false)
Expand Down
15 changes: 7 additions & 8 deletions crates/rome_js_formatter/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,9 @@ pub(crate) fn format_template_chunk(
// In template literals, the '\r' and '\r\n' line terminators are normalized to '\n'
Ok(formatter.format_replaced(
&chunk,
FormatElement::from(Token::new_dynamic(
normalize_newlines(chunk.text_trimmed(), ['\r']).into_owned(),
FormatElement::from(Token::from_syntax_token_cow_slice(
normalize_newlines(chunk.text_trimmed(), ['\r']),
&chunk,
chunk.text_trimmed_range().start(),
)),
))
Expand Down Expand Up @@ -615,16 +616,14 @@ pub(crate) fn format_string_literal_token(
if quoted.starts_with(secondary_quote_char) && !quoted.contains(primary_quote_char) {
let s = &quoted[1..quoted.len() - 1];
let s = format!("{}{}{}", primary_quote_char, s, primary_quote_char);
match normalize_newlines(&s, ['\r']) {
Cow::Borrowed(_) => s,
Cow::Owned(s) => s,
}
Cow::Owned(normalize_newlines(&s, ['\r']).into_owned())
} else {
normalize_newlines(quoted, ['\r']).into_owned()
normalize_newlines(quoted, ['\r'])
};

formatter.format_replaced(
&token,
Token::new_dynamic(content, token.text_trimmed_range().start()).into(),
Token::from_syntax_token_cow_slice(content, &token, token.text_trimmed_range().start())
.into(),
)
}
4 changes: 2 additions & 2 deletions crates/rome_js_syntax/src/expr_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
};
use crate::{JsPreUpdateExpression, JsSyntaxKind::*};
use rome_rowan::{
AstNode, AstSeparatedList, NodeOrToken, SyntaxResult, SyntaxText, TextRange, TextSize,
AstNode, AstSeparatedList, NodeOrToken, SyntaxNodeText, SyntaxResult, TextRange, TextSize,
};
use std::cmp::Ordering;

Expand Down Expand Up @@ -371,7 +371,7 @@ impl JsNumberLiteralExpression {

impl JsStringLiteralExpression {
/// Get the inner text of a string not including the quotes
pub fn inner_string_text(&self) -> SyntaxText {
pub fn inner_string_text(&self) -> SyntaxNodeText {
let start = self.syntax().text_range().start() + TextSize::from(1);
let end_char = self
.syntax()
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub use self::generated::*;
pub use expr_ext::*;
pub use modifier_ext::*;
pub use rome_rowan::{
SyntaxText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
SyntaxNodeText, TextLen, TextRange, TextSize, TokenAtOffset, TriviaPieceKind, WalkEvent,
};
pub use stmt_ext::*;
pub use syntax_node::*;
Expand Down
10 changes: 5 additions & 5 deletions crates/rome_rowan/src/cursor/node.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxToken, SyntaxTrivia};
use crate::green::{Child, Children, Slot};
use crate::{
Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxText, TokenAtOffset,
Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxNodeText, TokenAtOffset,
WalkEvent,
};
use std::borrow::Cow;
Expand Down Expand Up @@ -173,13 +173,13 @@ impl SyntaxNode {
}

#[inline]
pub fn text(&self) -> SyntaxText {
SyntaxText::new(self.clone())
pub fn text(&self) -> SyntaxNodeText {
SyntaxNodeText::new(self.clone())
}

#[inline]
pub fn text_trimmed(&self) -> SyntaxText {
SyntaxText::with_range(self.clone(), self.text_trimmed_range())
pub fn text_trimmed(&self) -> SyntaxNodeText {
SyntaxNodeText::with_range(self.clone(), self.text_trimmed_range())
}

#[inline]
Expand Down
7 changes: 6 additions & 1 deletion crates/rome_rowan/src/cursor/token.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::cursor::{free, GreenElement, NodeData, SyntaxElement, SyntaxNode, SyntaxTrivia};
use crate::{Direction, GreenTokenData, RawSyntaxKind};
use crate::{Direction, GreenTokenData, RawSyntaxKind, SyntaxTokenText};
use std::hash::{Hash, Hasher};
use std::{fmt, iter, ptr};
use text_size::{TextRange, TextSize};
Expand Down Expand Up @@ -88,6 +88,11 @@ impl SyntaxToken {
self.green().text()
}

#[inline]
pub fn token_text(&self) -> SyntaxTokenText {
SyntaxTokenText::new(self.green().to_owned())
}

#[inline]
pub fn text_trimmed(&self) -> &str {
self.green().text_trimmed()
Expand Down
6 changes: 4 additions & 2 deletions crates/rome_rowan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub mod cursor;
mod green;

pub mod syntax;
mod syntax_text;
mod syntax_node_text;
mod utility_types;

#[allow(unsafe_code)]
Expand All @@ -27,6 +27,7 @@ mod serde_impls;
#[allow(unsafe_code)]
mod sll;
mod syntax_factory;
mod syntax_token_text;
mod tree_builder;

pub use text_size::{TextLen, TextRange, TextSize};
Expand All @@ -40,7 +41,8 @@ pub use crate::{
TriviaPieceKind,
},
syntax_factory::*,
syntax_text::SyntaxText,
syntax_node_text::SyntaxNodeText,
syntax_token_text::SyntaxTokenText,
tree_builder::{Checkpoint, TreeBuilder},
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
};
Expand Down
6 changes: 3 additions & 3 deletions crates/rome_rowan/src/syntax/node.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::syntax::element::SyntaxElement;
use crate::syntax::SyntaxTrivia;
use crate::{
cursor, Direction, GreenNode, Language, NodeOrToken, SyntaxKind, SyntaxList, SyntaxText,
cursor, Direction, GreenNode, Language, NodeOrToken, SyntaxKind, SyntaxList, SyntaxNodeText,
SyntaxToken, TokenAtOffset, WalkEvent,
};
use std::fmt;
Expand Down Expand Up @@ -56,7 +56,7 @@ impl<L: Language> SyntaxNode<L> {
/// });
/// assert_eq!("\n\t let \t\ta; \t\t", node.text());
/// ```
pub fn text(&self) -> SyntaxText {
pub fn text(&self) -> SyntaxNodeText {
self.raw.text()
}

Expand Down Expand Up @@ -84,7 +84,7 @@ impl<L: Language> SyntaxNode<L> {
/// });
/// assert_eq!("let \t\ta;", node.text_trimmed());
/// ```
pub fn text_trimmed(&self) -> SyntaxText {
pub fn text_trimmed(&self) -> SyntaxNodeText {
self.raw.text_trimmed()
}

Expand Down
Loading

0 comments on commit e10d648

Please sign in to comment.