Skip to content

Commit

Permalink
chore(parser): reduce intermediate string allocations (#820)
Browse files Browse the repository at this point in the history
* chore(parser): reduce intermediate allocations

By threading the appropriate input lifetime through lexer and parser
methods, we can avoid copying strings in a few places.

Especially nice is that we can avoid copying the same strings multiple
times when peeking at tokens, and we can avoid an eager copy that is
almost always just thrown away in the implementation of `.expect()`.

* It's significantly faster to use references here
  • Loading branch information
goto-bus-stop authored Feb 1, 2024
1 parent 12db7e8 commit 0fa07a8
Show file tree
Hide file tree
Showing 15 changed files with 42 additions and 45 deletions.
6 changes: 3 additions & 3 deletions crates/apollo-parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ pub struct Token<'a> {
}

impl<'a> Token<'a> {
/// Get a reference to the token's kind.
/// Returns the kind of token.
pub fn kind(&self) -> TokenKind {
self.kind
}

/// Get a reference to the token's data.
/// Returns the source text for this token.
pub fn data(&self) -> &'a str {
self.data
}

/// Get a reference to the token's loc.
/// Returns the byte offset of this token in the source text.
pub fn index(&self) -> usize {
self.index
}
Expand Down
8 changes: 4 additions & 4 deletions crates/apollo-parser/src/parser/grammar/directive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn directive_definition(p: &mut Parser) {
description::description(p);
}

if let Some("directive") = p.peek_data().as_deref() {
if let Some("directive") = p.peek_data() {
p.bump(SyntaxKind::directive_KW);
}

Expand All @@ -38,13 +38,13 @@ pub(crate) fn directive_definition(p: &mut Parser) {
}

if let Some(node) = p.peek_data() {
if node.as_str() == "repeatable" {
if node == "repeatable" {
p.bump(SyntaxKind::repeatable_KW);
}
}

if let Some(node) = p.peek_data() {
match node.as_str() {
match node {
"on" => p.bump(SyntaxKind::on_KW),
_ => p.err("expected Directive Locations"),
}
Expand Down Expand Up @@ -74,7 +74,7 @@ pub(crate) fn directive_locations(p: &mut Parser, is_location: bool) {

if let Some(TokenKind::Name) = p.peek() {
let loc = p.peek_data().unwrap();
match loc.as_str() {
match loc {
"QUERY" => {
let _g = p.start_node(SyntaxKind::DIRECTIVE_LOCATION);
p.bump(SyntaxKind::QUERY_KW);
Expand Down
4 changes: 2 additions & 2 deletions crates/apollo-parser/src/parser/grammar/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ pub(crate) fn document(p: &mut Parser) {
doc.finish_node();
}

fn select_definition(def: String, p: &mut Parser) {
match def.as_str() {
fn select_definition(def: &str, p: &mut Parser) {
match def {
"directive" => directive::directive_definition(p),
"enum" => enum_::enum_type_definition(p),
"extend" => extensions::extensions(p),
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/enum_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub(crate) fn enum_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("enum") = p.peek_data().as_deref() {
if let Some("enum") = p.peek_data() {
p.bump(SyntaxKind::enum_KW);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
pub(crate) fn extensions(p: &mut Parser) {
// we already know the next node is 'extend', check for the node after that
// to figure out which type system extension to apply.
match p.peek_data_n(2).as_deref() {
match p.peek_data_n(2) {
Some("schema") => schema::schema_extension(p),
Some("scalar") => scalar::scalar_type_extension(p),
Some("type") => object::object_type_extension(p),
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub(crate) fn input_object_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("input") = p.peek_data().as_deref() {
if let Some("input") = p.peek_data() {
p.bump(SyntaxKind::input_KW);
}

Expand Down
6 changes: 3 additions & 3 deletions crates/apollo-parser/src/parser/grammar/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn interface_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("interface") = p.peek_data().as_deref() {
if let Some("interface") = p.peek_data() {
p.bump(SyntaxKind::interface_KW);
}

Expand All @@ -22,7 +22,7 @@ pub(crate) fn interface_type_definition(p: &mut Parser) {
_ => p.err("expected a Name"),
}

if let Some("implements") = p.peek_data().as_deref() {
if let Some("implements") = p.peek_data() {
object::implements_interfaces(p);
}

Expand Down Expand Up @@ -53,7 +53,7 @@ pub(crate) fn interface_type_extension(p: &mut Parser) {
_ => p.err("expected a Name"),
}

if let Some("implements") = p.peek_data().as_deref() {
if let Some("implements") = p.peek_data() {
meets_requirements = true;
object::implements_interfaces(p);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub(crate) fn name(p: &mut Parser) {
match p.peek() {
Some(TokenKind::Name) => {
let _g = p.start_node(SyntaxKind::NAME);
validate_name(&p.peek_data().unwrap(), p);
validate_name(p.peek_data().unwrap(), p);
p.bump(SyntaxKind::IDENT);
}
_ => p.err("expected a Name"),
Expand Down
4 changes: 2 additions & 2 deletions crates/apollo-parser/src/parser/grammar/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub(crate) fn object_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("type") = p.peek_data().as_deref() {
if let Some("type") = p.peek_data() {
p.bump(SyntaxKind::type_KW);
}

Expand Down Expand Up @@ -61,7 +61,7 @@ pub(crate) fn object_type_extension(p: &mut Parser) {
_ => p.err("expected a Name"),
}

if let Some("implements") = p.peek_data().as_deref() {
if let Some("implements") = p.peek_data() {
meets_requirements = true;
implements_interfaces(p);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub(crate) fn operation_definition(p: &mut Parser) {
pub(crate) fn operation_type(p: &mut Parser) {
if let Some(node) = p.peek_data() {
let _g = p.start_node(SyntaxKind::OPERATION_TYPE);
match node.as_str() {
match node {
"query" => p.bump(SyntaxKind::query_KW),
"subscription" => p.bump(SyntaxKind::subscription_KW),
"mutation" => p.bump(SyntaxKind::mutation_KW),
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn scalar_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("scalar") = p.peek_data().as_deref() {
if let Some("scalar") = p.peek_data() {
p.bump(SyntaxKind::scalar_KW);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn schema_definition(p: &mut Parser) {
description::description(p);
}

if let Some("schema") = p.peek_data().as_deref() {
if let Some("schema") = p.peek_data() {
p.bump(SyntaxKind::schema_KW);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/parser/grammar/union_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub(crate) fn union_type_definition(p: &mut Parser) {
description::description(p);
}

if let Some("union") = p.peek_data().as_deref() {
if let Some("union") = p.peek_data() {
p.bump(SyntaxKind::union_KW);
}

Expand Down
4 changes: 2 additions & 2 deletions crates/apollo-parser/src/parser/grammar/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub(crate) fn value(p: &mut Parser, constness: Constness, pop_on_error: bool) {
}
Some(TokenKind::Name) => {
let node = p.peek_data().unwrap();
match node.as_str() {
match node {
"true" => {
let _g = p.start_node(SyntaxKind::BOOLEAN_VALUE);
p.bump(SyntaxKind::true_KW);
Expand Down Expand Up @@ -85,7 +85,7 @@ pub(crate) fn enum_value(p: &mut Parser) {
let _g = p.start_node(SyntaxKind::ENUM_VALUE);
let name = p.peek_data().unwrap();

if matches!(name.as_str(), "true" | "false" | "null") {
if matches!(name, "true" | "false" | "null") {
p.err("unexpected Enum Value");
}

Expand Down
39 changes: 18 additions & 21 deletions crates/apollo-parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ pub(crate) use token_text::TokenText;
/// let document = cst.document();
/// ```
#[derive(Debug)]
pub struct Parser<'a> {
lexer: Lexer<'a>,
pub struct Parser<'input> {
lexer: Lexer<'input>,
/// Store one lookahead token so we don't need to reparse things as much.
current_token: Option<Token<'a>>,
current_token: Option<Token<'input>>,
/// The in-progress tree.
builder: Rc<RefCell<SyntaxTreeBuilder>>,
/// Ignored tokens that should be added to the tree.
ignored: Vec<Token<'a>>,
ignored: Vec<Token<'input>>,
/// The list of syntax errors we've accumulated so far.
errors: Vec<crate::Error>,
/// The limit to apply to parsing.
Expand All @@ -103,9 +103,9 @@ pub struct Parser<'a> {
/// Defaulting to around a quarter of that, to keep a comfortable safety margin.
const DEFAULT_RECURSION_LIMIT: usize = 500;

impl<'a> Parser<'a> {
impl<'input> Parser<'input> {
/// Create a new instance of a parser given an input string.
pub fn new(input: &'a str) -> Self {
pub fn new(input: &'input str) -> Self {
let lexer = Lexer::new(input);

Self {
Expand Down Expand Up @@ -240,7 +240,7 @@ impl<'a> Parser<'a> {
}

/// Get current token's data.
pub(crate) fn current(&mut self) -> Option<&Token> {
pub(crate) fn current(&mut self) -> Option<&Token<'input>> {
self.peek_token()
}

Expand Down Expand Up @@ -329,10 +329,7 @@ impl<'a> Parser<'a> {
return;
};
let is_eof = current.kind == TokenKind::Eof;
// TODO(@goto-bus-stop) this allocation is only required if we have an
// error, but has to be done eagerly here as the &str reference gets
// invalidated by `self.at()`. Can we avoid that?
let data = current.data().to_string();
let data = current.data();
let index = current.index();

if self.at(token) {
Expand All @@ -345,7 +342,7 @@ impl<'a> Parser<'a> {
Error::eof(message, index)
} else {
let message = format!("expected {kind:?}, got {data}");
Error::with_loc(message, data, index)
Error::with_loc(message, data.to_string(), index)
};

self.push_err(err);
Expand All @@ -366,7 +363,7 @@ impl<'a> Parser<'a> {
}

/// Gets the next token from the lexer.
fn next_token(&mut self) -> Option<Token<'a>> {
fn next_token(&mut self) -> Option<Token<'input>> {
for res in &mut self.lexer {
match res {
Err(err) => {
Expand All @@ -385,7 +382,7 @@ impl<'a> Parser<'a> {
}

/// Consume a token from the lexer.
pub(crate) fn pop(&mut self) -> Token<'a> {
pub(crate) fn pop(&mut self) -> Token<'input> {
if let Some(token) = self.current_token.take() {
return token;
}
Expand Down Expand Up @@ -432,15 +429,15 @@ impl<'a> Parser<'a> {
}

/// Peek the next Token and return it.
pub(crate) fn peek_token(&mut self) -> Option<&Token> {
pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> {
if self.current_token.is_none() {
self.current_token = self.next_token();
}
self.current_token.as_ref()
}

/// Peek Token `n` and return it.
pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token> {
pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token<'input>> {
self.peek_n_inner(n)
}

Expand All @@ -449,7 +446,7 @@ impl<'a> Parser<'a> {
self.peek_n_inner(n).map(|token| token.kind())
}

fn peek_n_inner(&self, n: usize) -> Option<Token> {
fn peek_n_inner(&self, n: usize) -> Option<Token<'input>> {
self.current_token
.iter()
.cloned()
Expand All @@ -461,13 +458,13 @@ impl<'a> Parser<'a> {
}

/// Peek next Token's `data` property.
pub(crate) fn peek_data(&mut self) -> Option<String> {
self.peek_token().map(|token| token.data().to_string())
pub(crate) fn peek_data(&mut self) -> Option<&'input str> {
self.peek_token().map(|token| token.data())
}

/// Peek `n` Token's `data` property.
pub(crate) fn peek_data_n(&self, n: usize) -> Option<String> {
self.peek_token_n(n).map(|token| token.data().to_string())
pub(crate) fn peek_data_n(&self, n: usize) -> Option<&'input str> {
self.peek_token_n(n).map(|token| token.data())
}
}

Expand Down

0 comments on commit 0fa07a8

Please sign in to comment.