Skip to content

Commit

Permalink
feat(parser): Stream tokens from the lexer (#115)
Browse files Browse the repository at this point in the history
Co-authored-by: Iryna Shestak <[email protected]>
Co-authored-by: Renée Kooi <[email protected]>
  • Loading branch information
3 people authored Nov 4, 2022
1 parent e72853c commit 87522b7
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 130 deletions.
2 changes: 1 addition & 1 deletion crates/apollo-compiler/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ fn collect_graphql_files(root_dir: &Path, paths: &[&str]) -> Vec<(PathBuf, Strin
/// Collects paths to all `.graphql` files from `dir` in a sorted `Vec<PathBuf>`.
fn graphql_files_in_dir(dir: &Path) -> Vec<PathBuf> {
let mut acc = Vec::new();
for file in fs::read_dir(&dir).unwrap() {
for file in fs::read_dir(dir).unwrap() {
let file = file.unwrap();
let path = file.path();
if path.extension().unwrap_or_default() == "graphql" {
Expand Down
9 changes: 7 additions & 2 deletions crates/apollo-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ unindent = "0.1.7"
criterion = "0.3.0"

[[bench]]
name = "benches"
path = "benches/benches.rs"
name = "query"
path = "benches/query.rs"
harness = false

[[bench]]
name = "supergraph"
path = "benches/supergraph.rs"
harness = false
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use apollo_parser::ast;
use apollo_parser::{ast, Lexer};
use criterion::*;

fn parse_query(query: &str) {
Expand All @@ -24,11 +24,25 @@ fn parse_query(query: &str) {
}
}

fn bench_parser_peek_n(c: &mut Criterion) {
fn bench_query_parser(c: &mut Criterion) {
let query = "query ExampleQuery($topProductsFirst: Int) {\n me { \n id\n }\n topProducts(first: $topProductsFirst) {\n name\n price\n inStock\n weight\n test test test test test test test test test test test test }\n}";

c.bench_function("parser_peek_n", move |b| b.iter(|| parse_query(query)));
c.bench_function("query_parser", move |b| b.iter(|| parse_query(query)));
}

criterion_group!(benches, bench_parser_peek_n);
fn bench_query_lexer(c: &mut Criterion) {
let query = "query ExampleQuery($topProductsFirst: Int) {\n me { \n id\n }\n topProducts(first: $topProductsFirst) {\n name\n price\n inStock\n weight\n test test test test test test test test test test test test }\n}";

c.bench_function("query_lexer", move |b| {
b.iter(|| {
let lexer = Lexer::new(query);

for token_res in lexer {
let _ = token_res;
}
})
});
}

criterion_group!(benches, bench_query_lexer, bench_query_parser);
criterion_main!(benches);
52 changes: 52 additions & 0 deletions crates/apollo-parser/benches/supergraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use apollo_parser::{ast, Lexer};
use criterion::*;

fn parse_schema(schema: &str) {
let parser = apollo_parser::Parser::new(schema);
let tree = parser.parse();
let errors = tree.errors().collect::<Vec<_>>();

if !errors.is_empty() {
panic!("error parsing query: {:?}", errors);
}

let document = tree.document();

for definition in document.definitions() {
if let ast::Definition::OperationDefinition(operation) = definition {
let selection_set = operation
.selection_set()
.expect("the node SelectionSet is not optional in the spec; qed");
for selection in selection_set.selections() {
if let ast::Selection::Field(field) = selection {
let _selection_set = field.selection_set();
}
}
}
}
}

fn bench_supergraph_parser(c: &mut Criterion) {
let schema = include_str!("../test_data/parser/ok/0032_supergraph.graphql");

c.bench_function("supergraph_parser", move |b| {
b.iter(|| parse_schema(schema))
});
}

fn bench_supergraph_lexer(c: &mut Criterion) {
let schema = include_str!("../test_data/parser/ok/0032_supergraph.graphql");

c.bench_function("supergraph_lexer", move |b| {
b.iter(|| {
let lexer = Lexer::new(schema);

for token_res in lexer {
let _ = token_res;
}
})
});
}

criterion_group!(benches, bench_supergraph_lexer, bench_supergraph_parser);
criterion_main!(benches);
133 changes: 77 additions & 56 deletions crates/apollo-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ mod cursor;
mod token;
mod token_kind;

use std::slice::Iter;

use crate::{lexer::cursor::Cursor, Error};

pub use token::Token;
pub use token_kind::TokenKind;
/// Parses tokens into text.

/// Parses GraphQL source text into tokens.
/// ```rust
/// use apollo_parser::Lexer;
///
Expand All @@ -23,70 +22,92 @@ pub use token_kind::TokenKind;
/// }
/// }
/// ";
/// let lexer = Lexer::new(query);
/// assert_eq!(lexer.errors().len(), 0);
///
/// let tokens = lexer.tokens();
/// let (tokens, errors) = Lexer::new(query).lex();
/// assert_eq!(errors.len(), 0);
/// ```
pub struct Lexer {
tokens: Vec<Token>,
errors: Vec<Error>,
#[derive(Clone, Debug)]
pub struct Lexer<'a> {
input: &'a str,
index: usize,
finished: bool,
}

impl Lexer {
/// Create a new instance of `Lexer`.
pub fn new(mut input: &str) -> Self {
let mut tokens = Vec::new();
let mut errors = Vec::new();

let mut index = 0;
impl<'a> Lexer<'a> {
/// Create a lexer for a GraphQL source text.
///
/// The Lexer is an iterator over tokens and errors:
/// ```rust
/// use apollo_parser::Lexer;
///
/// let query = "# --- GraphQL here ---";
///
/// let mut lexer = Lexer::new(query);
/// let mut tokens = vec![];
/// for token in lexer {
/// match token {
/// Ok(token) => tokens.push(token),
/// Err(error) => panic!("{:?}", error),
/// }
/// }
/// ```
pub fn new(input: &'a str) -> Self {
Self {
input,
index: 0,
finished: false,
}
}

while !input.is_empty() {
let old_input = input;
/// Lex the full source text, consuming the lexer.
pub fn lex(self) -> (Vec<Token>, Vec<Error>) {
let mut tokens = vec![];
let mut errors = vec![];

if old_input.len() == input.len() {
let mut c = Cursor::new(input);
let r = c.advance();
for item in self {
match item {
Ok(token) => tokens.push(token),
Err(error) => errors.push(error),
}
}

match r {
Ok(mut token) => {
token.index = index;
index += token.data.len();
(tokens, errors)
}
}

input = &input[token.data.len()..];
tokens.push(token);
}
Err(mut err) => {
err.index = index;
index += err.data.len();
impl<'a> Iterator for Lexer<'a> {
type Item = Result<Token, Error>;

input = &input[err.data.len()..];
errors.push(err);
}
}
}
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
if self.input.is_empty() {
let mut eof = Token::new(TokenKind::Eof, String::from("EOF"));
eof.index = self.index;

let mut eof = Token::new(TokenKind::Eof, String::from("EOF"));
eof.index = index;
tokens.push(eof);
self.finished = true;
return Some(Ok(eof));
}

Self { tokens, errors }
}
let mut c = Cursor::new(self.input);
let r = c.advance();

/// Get a reference to the lexer's tokens.
pub fn tokens(&self) -> &[Token] {
self.tokens.as_slice()
}
match r {
Ok(mut token) => {
token.index = self.index;
self.index += token.data.len();

/// Get a reference to the lexer's errors.
pub fn errors(&self) -> Iter<'_, Error> {
self.errors.iter()
}
self.input = &self.input[token.data.len()..];
Some(Ok(token))
}
Err(mut err) => {
err.index = self.index;
self.index += err.data.len();

/// Consume the lexer and return the tokens and errors.
pub fn into_parts(self) -> (Vec<Token>, Vec<Error>) {
(self.tokens, self.errors)
self.input = &self.input[err.data.len()..];
Some(Err(err))
}
}
}
}

Expand Down Expand Up @@ -427,8 +448,8 @@ mod test {
#[test]
fn tests() {
let gql_1 = "\"\nhello";
let lexer_1 = Lexer::new(gql_1);
dbg!(lexer_1.tokens);
dbg!(lexer_1.errors);
let (tokens, errors) = Lexer::new(gql_1).lex();
dbg!(tokens);
dbg!(errors);
}
}
Loading

0 comments on commit 87522b7

Please sign in to comment.