From e743184085414ca1a080ed06573f1c45ee3d31a8 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Thu, 18 Nov 2021 17:09:56 +0100 Subject: [PATCH] store the next token to avoid reparsing when peeking the parser is frequently looking at the next token to make a decision. When we stored the list of tokens, that operation was essentially free, but in streaming, we end up reparsing the next token multiple times. By storing it in advance, we avoid that cost --- crates/apollo-parser/src/lexer/mod.rs | 10 ---- crates/apollo-parser/src/parser/mod.rs | 68 +++++++++++++++++--------- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/crates/apollo-parser/src/lexer/mod.rs b/crates/apollo-parser/src/lexer/mod.rs index fd391f12a..6dc386d32 100644 --- a/crates/apollo-parser/src/lexer/mod.rs +++ b/crates/apollo-parser/src/lexer/mod.rs @@ -84,16 +84,6 @@ impl<'a> LexerIterator<'a> { finished: false, } } - - pub fn peek_token(&self) -> Option { - let it = self.clone(); - - it.filter_map(|res| match res { - LexerResult::Error(_) => None, - LexerResult::Token(token) => Some(token), - }) - .next() - } } impl<'a> Iterator for LexerIterator<'a> { diff --git a/crates/apollo-parser/src/parser/mod.rs b/crates/apollo-parser/src/parser/mod.rs index 15cd39f59..46817f94f 100644 --- a/crates/apollo-parser/src/parser/mod.rs +++ b/crates/apollo-parser/src/parser/mod.rs @@ -28,6 +28,7 @@ pub struct Parser<'a> { builder: Rc>, /// The list of syntax errors we've accumulated so far. errors: Vec, + current_token: Option, } impl<'a> Parser<'a> { @@ -39,6 +40,7 @@ impl<'a> Parser<'a> { lexer, builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())), errors: Vec::new(), + current_token: None, } } @@ -87,26 +89,15 @@ impl<'a> Parser<'a> { } /// Get current token's data. - pub(crate) fn current(&mut self) -> Token { + pub(crate) fn current(&mut self) -> &Token { self.peek_token() .expect("Could not peek at the current token") } /// Consume a token from the lexer and add it to the AST. fn eat(&mut self, kind: SyntaxKind) { - loop { - match self - .lexer - .next() - .expect("Could not eat a token from the AST") - { - LexerResult::Error(e) => self.errors.push(e), - LexerResult::Token(token) => { - self.builder.borrow_mut().token(kind, token.data()); - break; - } - } - } + let token = self.pop(); + self.builder.borrow_mut().token(kind, token.data()); } /// Create a parser error and push it into the error vector. @@ -155,6 +146,10 @@ impl<'a> Parser<'a> { /// Consume a token from the lexer. pub(crate) fn pop(&mut self) -> Token { + if let Some(token) = self.current_token.take() { + return token; + } + loop { match self .lexer @@ -187,43 +182,68 @@ impl<'a> Parser<'a> { } /// Peek the next Token and return its TokenKind. - pub(crate) fn peek(&self) -> Option { - self.lexer.peek_token().map(|token| token.kind()) + pub(crate) fn peek(&mut self) -> Option { + self.peek_token().map(|token| token.kind()) } /// Peek the next Token and return it. - pub(crate) fn peek_token(&self) -> Option { - self.lexer.peek_token() + pub(crate) fn peek_token(&mut self) -> Option<&Token> { + if self.current_token.is_none() { + loop { + match self + .lexer + .next() + .expect("Could not pop a token from the AST") + { + LexerResult::Error(e) => self.errors.push(e), + LexerResult::Token(token) => { + self.current_token = Some(token); + break; + } + } + } + } + self.current_token.as_ref() } /// Peek Token `n` and return its TokenKind. pub(crate) fn peek_n(&self, n: usize) -> Option { + let index = if self.current_token.is_none() { + n - 1 + } else { + n - 2 + }; + let it = self.lexer.clone(); it.filter_map(|res| match res { LexerResult::Error(_) => None, LexerResult::Token(token) => Some(token), }) .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment)) - .nth(n - 1) + .nth(index) .map(|token| token.kind()) } /// Peek next Token's `data` property. - pub(crate) fn peek_data(&self) -> Option { - self.lexer - .peek_token() - .map(|token| token.data().to_string()) + pub(crate) fn peek_data(&mut self) -> Option { + self.peek_token().map(|token| token.data().to_string()) } /// Peek `n` Token's `data` property. pub(crate) fn peek_data_n(&self, n: usize) -> Option { + let index = if self.current_token.is_none() { + n - 1 + } else { + n - 2 + }; + let it = self.lexer.clone(); it.filter_map(|res| match res { LexerResult::Error(_) => None, LexerResult::Token(token) => Some(token), }) .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment)) - .nth(n - 1) + .nth(index) .map(|token| token.data().to_string()) } }