Skip to content

Commit

Permalink
parse tokens in streaming
Browse files Browse the repository at this point in the history
this is currently twice as slow as accumulating all tokens then parsing,
because we regularly call the peek_* methods. They're basically free if
we accumulate in advance the tokens, but in streaming we are
recalculating them all the time
  • Loading branch information
Geoffroy Couprie committed Nov 24, 2021
1 parent c533eae commit 23e9d57
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 53 deletions.
23 changes: 21 additions & 2 deletions crates/apollo-parser/benches/query.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use apollo_parser::{ast, Lexer};
use apollo_parser::{ast, Lexer, LexerIterator};
use criterion::*;

fn parse_query(query: &str) {
Expand Down Expand Up @@ -43,5 +43,24 @@ fn bench_query_lexer(c: &mut Criterion) {
});
}

criterion_group!(benches, bench_query_lexer, bench_query_parser);
fn bench_query_lexer_streaming(c: &mut Criterion) {
let query = "query ExampleQuery($topProductsFirst: Int) {\n me { \n id\n }\n topProducts(first: $topProductsFirst) {\n name\n price\n inStock\n weight\n test test test test test test test test test test test test }\n}";

c.bench_function("query_lexer_streaming", move |b| {
b.iter(|| {
let lexer = LexerIterator::new(query);

for token_res in lexer {
let _ = token_res;
}
})
});
}

criterion_group!(
benches,
bench_query_lexer,
bench_query_lexer_streaming,
bench_query_parser
);
criterion_main!(benches);
23 changes: 21 additions & 2 deletions crates/apollo-parser/benches/supergraph.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use apollo_parser::{ast, Lexer};
use apollo_parser::{ast, Lexer, LexerIterator};
use criterion::*;

fn parse_schema(schema: &str) {
Expand Down Expand Up @@ -46,5 +46,24 @@ fn bench_supergraph_lexer(c: &mut Criterion) {
});
}

criterion_group!(benches, bench_supergraph_lexer, bench_supergraph_parser);
fn bench_supergraph_lexer_streaming(c: &mut Criterion) {
let schema = include_str!("../test_data/parser/ok/0032_supergraph.graphql");

c.bench_function("supergraph_lexer_streaming", move |b| {
b.iter(|| {
let lexer = LexerIterator::new(schema);

for token_res in lexer {
let _ = token_res;
}
})
});
}

criterion_group!(
benches,
bench_supergraph_lexer,
bench_supergraph_lexer_streaming,
bench_supergraph_parser
);
criterion_main!(benches);
73 changes: 71 additions & 2 deletions crates/apollo-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,85 @@ impl Lexer {
}

/// Get a reference to the lexer's tokens.
pub(crate) fn tokens(&self) -> &[Token] {
pub fn tokens(&self) -> &[Token] {
self.tokens.as_slice()
}

/// Get a reference to the lexer's tokens.
pub(crate) fn errors(&self) -> &[Error] {
pub fn errors(&self) -> &[Error] {
self.errors.as_slice()
}
}

#[derive(Clone, Debug)]
pub struct LexerIterator<'a> {
input: &'a str,
index: usize,
finished: bool,
}

pub enum LexerResult {
Token(Token),
Error(Error),
}

impl<'a> LexerIterator<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
index: 0,
finished: false,
}
}

pub fn peek_token(&self) -> Option<Token> {
let it = self.clone();

it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.next()
}
}

impl<'a> Iterator for LexerIterator<'a> {
type Item = LexerResult;

fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
if self.input.is_empty() {
let mut eof = Token::new(TokenKind::Eof, String::from("EOF"));
eof.index = self.index;

self.finished = true;
return Some(LexerResult::Token(eof));
}

let mut c = Cursor::new(self.input);
let r = c.advance();

match r {
Ok(mut token) => {
token.index = self.index;
self.index += token.data.len();

self.input = &self.input[token.data.len()..];
Some(LexerResult::Token(token))
}
Err(mut err) => {
err.index = self.index;
self.index += err.data.len();

self.input = &self.input[err.data.len()..];
Some(LexerResult::Error(err))
}
}
}
}

impl Cursor<'_> {
fn advance(&mut self) -> Result<Token, Error> {
let first_char = self.bump().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion crates/apollo-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ pub mod ast;
mod error;
mod parser;

pub use crate::lexer::Lexer;
pub use crate::lexer::{Lexer, LexerIterator};

pub(crate) use crate::lexer::{Token, TokenKind};
pub(crate) use crate::parser::{
Expand Down
104 changes: 58 additions & 46 deletions crates/apollo-parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ mod token_text;

pub(crate) mod grammar;

use std::{cell::RefCell, collections::VecDeque, rc::Rc};
use std::{cell::RefCell, rc::Rc};

use crate::{lexer::Lexer, Error, Token, TokenKind};
use crate::{
lexer::{LexerIterator, LexerResult},
Error, Token, TokenKind,
};

pub use generated::syntax_kind::SyntaxKind;
pub use language::{SyntaxElement, SyntaxNodeChildren, SyntaxToken};
Expand Down Expand Up @@ -70,37 +73,23 @@ pub(crate) use token_text::TokenText;
/// let document = ast.document();
/// ```
#[derive(Debug)]
pub struct Parser {
/// Input tokens, including whitespace, in *reverse* order.
tokens: VecDeque<Token>,
pub struct Parser<'a> {
lexer: LexerIterator<'a>,
/// The in-progress tree.
builder: Rc<RefCell<SyntaxTreeBuilder>>,
/// The list of syntax errors we've accumulated so far.
errors: Vec<crate::Error>,
}

impl Parser {
impl<'a> Parser<'a> {
/// Create a new instance of a parser given an input string.
pub fn new(input: &str) -> Self {
let lexer = Lexer::new(input);

let mut tokens = VecDeque::new();
let mut errors = Vec::new();

for s in lexer.tokens().to_owned() {
tokens.push_back(s);
}

for e in lexer.errors().to_owned() {
errors.push(e);
}

errors.reverse();
pub fn new(input: &'a str) -> Self {
let lexer = LexerIterator::new(input);

Self {
tokens,
lexer,
builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
errors,
errors: Vec::new(),
}
}

Expand Down Expand Up @@ -149,18 +138,26 @@ impl Parser {
}

/// Get current token's data.
pub(crate) fn current(&mut self) -> &Token {
pub(crate) fn current(&mut self) -> Token {
self.peek_token()
.expect("Could not peek at the current token")
}

/// Consume a token from the lexer and add it to the AST.
fn eat(&mut self, kind: SyntaxKind) {
let token = self
.tokens
.pop_front()
.expect("Could not eat a token from the AST");
self.builder.borrow_mut().token(kind, token.data());
loop {
match self
.lexer
.next()
.expect("Could not eat a token from the AST")
{
LexerResult::Error(e) => self.errors.push(e),
LexerResult::Token(token) => {
self.builder.borrow_mut().token(kind, token.data());
break;
}
}
}
}

/// Create a parser error and push it into the error vector.
Expand Down Expand Up @@ -209,9 +206,16 @@ impl Parser {

/// Consume a token from the lexer.
pub(crate) fn pop(&mut self) -> Token {
self.tokens
.pop_front()
.expect("Could not pop a token from the AST")
loop {
match self
.lexer
.next()
.expect("Could not pop a token from the AST")
{
LexerResult::Error(e) => self.errors.push(e),
LexerResult::Token(token) => return token,
}
}
}

/// Insert a token into the AST.
Expand All @@ -235,35 +239,43 @@ impl Parser {

/// Peek the next Token and return its TokenKind.
pub(crate) fn peek(&self) -> Option<TokenKind> {
self.tokens.front().map(|token| token.kind())
self.lexer.peek_token().map(|token| token.kind())
}

/// Peek the next Token and return it.
pub(crate) fn peek_token(&self) -> Option<&Token> {
self.tokens.front()
pub(crate) fn peek_token(&self) -> Option<Token> {
self.lexer.peek_token()
}

/// Peek Token `n` and return its TokenKind.
pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
self.tokens
.iter()
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.kind())
let it = self.lexer.clone();
it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.kind())
}

/// Peek next Token's `data` property.
pub(crate) fn peek_data(&self) -> Option<String> {
self.tokens.front().map(|token| token.data().to_string())
self.lexer
.peek_token()
.map(|token| token.data().to_string())
}

/// Peek `n` Token's `data` property.
pub(crate) fn peek_data_n(&self, n: usize) -> Option<String> {
self.tokens
.iter()
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.data().to_string())
let it = self.lexer.clone();
it.filter_map(|res| match res {
LexerResult::Error(_) => None,
LexerResult::Token(token) => Some(token),
})
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
.map(|token| token.data().to_string())
}
}

Expand Down

0 comments on commit 23e9d57

Please sign in to comment.