Skip to content

Commit

Permalink
parse tokens in streaming
Browse files Browse the repository at this point in the history
this is currently twice as slow as accumulating all tokens then parsing,
because we regularly call the peek_* methods. They're basically free if
we accumulate in advance the tokens, but in streaming we are
recalculating them all the time
  • Loading branch information
Geoffroy Couprie committed Nov 18, 2021
1 parent 57647fe commit 81f94f4
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 45 deletions.
69 changes: 69 additions & 0 deletions crates/apollo-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,75 @@ impl Lexer {
}
}

#[derive(Clone, Debug)]
pub struct LexerIterator<'a> {
input: &'a str,
index: usize,
finished: bool,
}

pub enum LexerResult {
Token(Token),
Error(Error),
}

impl<'a> LexerIterator<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
index: 0,
finished: false,
}
}

pub fn peek_token(&self) -> Option<Token> {
let it = self.clone();

it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.next()
}
}

impl<'a> Iterator for LexerIterator<'a> {
type Item = LexerResult;

fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
if self.input.is_empty() {
let mut eof = Token::new(TokenKind::Eof, String::from("EOF"));
eof.index = self.index;

self.finished = true;
return Some(LexerResult::Token(eof));
}

let mut c = Cursor::new(self.input);
let r = c.advance();

match r {
Ok(mut token) => {
token.index = self.index;
self.index += token.data.len();

self.input = &self.input[token.data.len()..];
Some(LexerResult::Token(token))
}
Err(mut err) => {
err.index = self.index;
self.index += err.data.len();

self.input = &self.input[err.data.len()..];
Some(LexerResult::Error(err))
}
}
}
}

impl Cursor<'_> {
fn advance(&mut self) -> Result<Token, Error> {
let first_char = self.bump().unwrap();
Expand Down
102 changes: 57 additions & 45 deletions crates/apollo-parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ pub(crate) mod grammar;

use std::{cell::RefCell, collections::VecDeque, rc::Rc};

use crate::{lexer::Lexer, Error, Token, TokenKind};
use crate::{
lexer::{Lexer, LexerIterator, LexerResult},
Error, Token, TokenKind,
};

pub use generated::syntax_kind::SyntaxKind;
pub use language::{SyntaxElement, SyntaxElementChildren, SyntaxNodeChildren, SyntaxToken};
Expand All @@ -19,37 +22,23 @@ pub(crate) use token_text::TokenText;

/// Parse text into an AST.
#[derive(Debug)]
pub struct Parser {
/// Input tokens, including whitespace, in *reverse* order.
tokens: VecDeque<Token>,
pub struct Parser<'a> {
lexer: LexerIterator<'a>,
/// The in-progress tree.
builder: Rc<RefCell<SyntaxTreeBuilder>>,
/// The list of syntax errors we've accumulated so far.
errors: Vec<crate::Error>,
}

impl Parser {
impl<'a> Parser<'a> {
/// Create a new instance of a parser given an input string.
pub fn new(input: &str) -> Self {
let lexer = Lexer::new(input);

let mut tokens = VecDeque::new();
let mut errors = Vec::new();

for s in lexer.tokens().to_owned() {
tokens.push_back(s);
}

for e in lexer.errors().to_owned() {
errors.push(e);
}

errors.reverse();
pub fn new(input: &'a str) -> Self {
let lexer = LexerIterator::new(input);

Self {
tokens,
lexer,
builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
errors,
errors: Vec::new(),
}
}

Expand Down Expand Up @@ -98,18 +87,26 @@ impl Parser {
}

/// Get current token's data.
pub(crate) fn current(&mut self) -> &Token {
pub(crate) fn current(&mut self) -> Token {
self.peek_token()
.expect("Could not peek at the current token")
}

/// Consume a token from the lexer and add it to the AST.
fn eat(&mut self, kind: SyntaxKind) {
let token = self
.tokens
.pop_front()
.expect("Could not eat a token from the AST");
self.builder.borrow_mut().token(kind, token.data());
loop {
match self
.lexer
.next()
.expect("Could not eat a token from the AST")
{
LexerResult::Error(e) => self.errors.push(e),
LexerResult::Token(token) => {
self.builder.borrow_mut().token(kind, token.data());
break;
}
}
}
}

/// Create a parser error and push it into the error vector.
Expand Down Expand Up @@ -158,9 +155,16 @@ impl Parser {

/// Consume a token from the lexer.
pub(crate) fn pop(&mut self) -> Token {
self.tokens
.pop_front()
.expect("Could not pop a token from the AST")
loop {
match self
.lexer
.next()
.expect("Could not pop a token from the AST")
{
LexerResult::Error(e) => self.errors.push(e),
LexerResult::Token(token) => return token,
}
}
}

/// Insert a token into the AST.
Expand All @@ -184,35 +188,43 @@ impl Parser {

/// Peek the next Token and return its TokenKind.
pub(crate) fn peek(&self) -> Option<TokenKind> {
self.tokens.front().map(|token| token.kind())
self.lexer.peek_token().map(|token| token.kind())
}

/// Peek the next Token and return it.
pub(crate) fn peek_token(&self) -> Option<&Token> {
self.tokens.front()
pub(crate) fn peek_token(&self) -> Option<Token> {
self.lexer.peek_token()
}

/// Peek Token `n` and return its TokenKind.
pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
self.tokens
.iter()
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.kind())
let it = self.lexer.clone();
it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.kind())
}

/// Peek next Token's `data` property.
pub(crate) fn peek_data(&self) -> Option<String> {
self.tokens.front().map(|token| token.data().to_string())
self.lexer
.peek_token()
.map(|token| token.data().to_string())
}

/// Peek `n` Token's `data` property.
pub(crate) fn peek_data_n(&self, n: usize) -> Option<String> {
self.tokens
.iter()
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.data().to_string())
let it = self.lexer.clone();
it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.data().to_string())
}
}

Expand Down

0 comments on commit 81f94f4

Please sign in to comment.