From bc1dfc443f7c173e7404579e8bc26eb1f4668c4b Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Thu, 18 Nov 2021 17:09:56 +0100 Subject: [PATCH] store the next token to avoid reparsing when peeking the parser is frequently looking at the next token to make a decision. When we stored the list of tokens, that operation was essentially free, but in streaming, we end up reparsing the next token multiple times. By storing it in advance, we avoid that cost --- crates/apollo-parser/src/lexer/mod.rs | 10 ---- crates/apollo-parser/src/parser/mod.rs | 68 +++++++++++++++++--------- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/crates/apollo-parser/src/lexer/mod.rs b/crates/apollo-parser/src/lexer/mod.rs index b9509c1ff..8187dccd5 100644 --- a/crates/apollo-parser/src/lexer/mod.rs +++ b/crates/apollo-parser/src/lexer/mod.rs @@ -84,16 +84,6 @@ impl<'a> LexerIterator<'a> { finished: false, } } - - pub fn peek_token(&self) -> Option { - let it = self.clone(); - - it.filter_map(|res| match res { - LexerResult::Error(_) => None, - LexerResult::Token(token) => Some(token), - }) - .next() - } } impl<'a> Iterator for LexerIterator<'a> { diff --git a/crates/apollo-parser/src/parser/mod.rs b/crates/apollo-parser/src/parser/mod.rs index 46dba99a1..024fd7681 100644 --- a/crates/apollo-parser/src/parser/mod.rs +++ b/crates/apollo-parser/src/parser/mod.rs @@ -79,6 +79,7 @@ pub struct Parser<'a> { builder: Rc>, /// The list of syntax errors we've accumulated so far. errors: Vec, + current_token: Option, } impl<'a> Parser<'a> { @@ -90,6 +91,7 @@ impl<'a> Parser<'a> { lexer, builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())), errors: Vec::new(), + current_token: None, } } @@ -138,26 +140,15 @@ impl<'a> Parser<'a> { } /// Get current token's data. - pub(crate) fn current(&mut self) -> Token { + pub(crate) fn current(&mut self) -> &Token { self.peek_token() .expect("Could not peek at the current token") } /// Consume a token from the lexer and add it to the AST. fn eat(&mut self, kind: SyntaxKind) { - loop { - match self - .lexer - .next() - .expect("Could not eat a token from the AST") - { - LexerResult::Error(e) => self.errors.push(e), - LexerResult::Token(token) => { - self.builder.borrow_mut().token(kind, token.data()); - break; - } - } - } + let token = self.pop(); + self.builder.borrow_mut().token(kind, token.data()); } /// Create a parser error and push it into the error vector. @@ -206,6 +197,10 @@ impl<'a> Parser<'a> { /// Consume a token from the lexer. pub(crate) fn pop(&mut self) -> Token { + if let Some(token) = self.current_token.take() { + return token; + } + loop { match self .lexer @@ -238,43 +233,68 @@ impl<'a> Parser<'a> { } /// Peek the next Token and return its TokenKind. - pub(crate) fn peek(&self) -> Option { - self.lexer.peek_token().map(|token| token.kind()) + pub(crate) fn peek(&mut self) -> Option { + self.peek_token().map(|token| token.kind()) } /// Peek the next Token and return it. - pub(crate) fn peek_token(&self) -> Option { - self.lexer.peek_token() + pub(crate) fn peek_token(&mut self) -> Option<&Token> { + if self.current_token.is_none() { + loop { + match self + .lexer + .next() + .expect("Could not pop a token from the AST") + { + LexerResult::Error(e) => self.errors.push(e), + LexerResult::Token(token) => { + self.current_token = Some(token); + break; + } + } + } + } + self.current_token.as_ref() } /// Peek Token `n` and return its TokenKind. pub(crate) fn peek_n(&self, n: usize) -> Option { + let index = if self.current_token.is_none() { + n - 1 + } else { + n - 2 + }; + let it = self.lexer.clone(); it.filter_map(|res| match res { LexerResult::Error(_) => None, LexerResult::Token(token) => Some(token), }) .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment)) - .nth(n - 1) + .nth(index) .map(|token| token.kind()) } /// Peek next Token's `data` property. - pub(crate) fn peek_data(&self) -> Option { - self.lexer - .peek_token() - .map(|token| token.data().to_string()) + pub(crate) fn peek_data(&mut self) -> Option { + self.peek_token().map(|token| token.data().to_string()) } /// Peek `n` Token's `data` property. pub(crate) fn peek_data_n(&self, n: usize) -> Option { + let index = if self.current_token.is_none() { + n - 1 + } else { + n - 2 + }; + let it = self.lexer.clone(); it.filter_map(|res| match res { LexerResult::Error(_) => None, LexerResult::Token(token) => Some(token), }) .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment)) - .nth(n - 1) + .nth(index) .map(|token| token.data().to_string()) } }