diff --git a/crates/apollo-parser/src/lexer/cursor.rs b/crates/apollo-parser/src/lexer/cursor.rs index f103d4c39..591e573d7 100644 --- a/crates/apollo-parser/src/lexer/cursor.rs +++ b/crates/apollo-parser/src/lexer/cursor.rs @@ -1,63 +1,127 @@ -use std::str::Chars; +use std::str::CharIndices; use crate::Error; + /// Peekable iterator over a char sequence. +#[derive(Debug, Clone)] pub(crate) struct Cursor<'a> { - chars: Chars<'a>, + index: usize, + offset: usize, + source: &'a str, + chars: CharIndices<'a>, + pending: Option, pub(crate) err: Option, } impl<'a> Cursor<'a> { pub(crate) fn new(input: &'a str) -> Cursor<'a> { Cursor { - chars: input.chars(), + index: 0, + offset: 0, + pending: None, + source: input, + chars: input.char_indices(), err: None, } } } -pub(crate) const EOF_CHAR: char = '\0'; - impl<'a> Cursor<'a> { - /// Returns nth character relative to the current cursor position. - fn nth_char(&self, n: usize) -> char { - self.chars().nth(n).unwrap_or(EOF_CHAR) + /// Current place (index) in the cursor. + pub(crate) fn index(&self) -> usize { + self.index } - /// Peeks the next char in input without consuming. - pub(crate) fn first(&self) -> char { - self.nth_char(0) + /// Return true if the current state is pending. + pub(crate) fn is_pending(&self) -> bool { + self.pending.is_some() } - /// Peeks the second char in input without consuming. - pub(crate) fn second(&self) -> char { - self.nth_char(1) + /// Moves to the next character. + pub(crate) fn prev_str(&mut self) -> &'a str { + let slice = &self.source[self.index..self.offset]; + + self.index = self.offset; + self.pending = self + .source + .get(self.offset..) + .and_then(|subslice| subslice.chars().next()); + + slice } - /// Checks if there are chars to consume. - pub(crate) fn is_eof(&self) -> bool { - self.chars.as_str().is_empty() + /// Moves to the next character. + pub(crate) fn current_str(&mut self) -> &'a str { + self.pending = None; + + if let Some((pos, next)) = self.chars.next() { + let current = self.index; + + self.index = pos; + self.offset = pos; + self.pending = Some(next); + + self.source.get(current..pos) + } else { + let current = self.index; + self.index = self.source.len() - 1; + + self.source.get(current..) + } + .unwrap() } /// Moves to the next character. pub(crate) fn bump(&mut self) -> Option { - let c = self.chars.next()?; + if self.pending.is_some() { + return self.pending.take(); + } + + if self.offset == self.source.len() { + return None; + } + + let (pos, c) = self.chars.next()?; + self.offset = pos; Some(c) } + /// Moves to the next character. + pub(crate) fn eatc(&mut self, c: char) -> bool { + if self.pending.is_some() { + panic!("dont call eatc when a character is pending"); + } + + if let Some((pos, c_in)) = self.chars.next() { + self.offset = pos; + + if c_in == c { + return true; + } + + self.pending = Some(c_in); + } + + false + } + /// Get current error object in the cursor. pub(crate) fn err(&mut self) -> Option { self.err.clone() } + /// Drains the current pending characters. + pub(crate) fn drain(&mut self) -> &'a str { + self.pending = None; + let start = self.index; + self.index = self.source.len() - 1; + + self.source.get(start..=self.index).unwrap() + } + /// Add error object to the cursor. pub(crate) fn add_err(&mut self, err: Error) { self.err = Some(err) } - - /// Returns a `Chars` iterator over the remaining characters. - fn chars(&self) -> Chars<'_> { - self.chars.clone() - } } diff --git a/crates/apollo-parser/src/lexer/mod.rs b/crates/apollo-parser/src/lexer/mod.rs index 04adec473..6147e80d7 100644 --- a/crates/apollo-parser/src/lexer/mod.rs +++ b/crates/apollo-parser/src/lexer/mod.rs @@ -30,9 +30,31 @@ pub struct Lexer<'a> { input: &'a str, index: usize, finished: bool, + cursor: Cursor<'a>, pub(crate) limit_tracker: LimitTracker, } +#[derive(Debug)] +enum State { + Start, + Done, + Ident, + StringLiteralEscapedUnicode(usize), + StringLiteral, + StringLiteralStart, + BlockStringLiteralEscapedUnicode(usize), + BlockStringLiteral, + BlockStringLiteralBackslash, + StringLiteralBackslash, + IntLiteral, + FloatLiteral, + ExponentLiteral, + Whitespace, + Comment, + SpreadOperator, + PlusMinus, +} + impl<'a> Lexer<'a> { /// Create a lexer for a GraphQL source text. /// @@ -54,6 +76,7 @@ impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { Self { input, + cursor: Cursor::new(input), index: 0, finished: false, limit_tracker: LimitTracker::new(usize::MAX), @@ -61,12 +84,12 @@ impl<'a> Lexer<'a> { } pub fn with_limit(mut self, limit: usize) -> Self { - self.limit_tracker.limit = limit; + self.limit_tracker = LimitTracker::new(limit); self } /// Lex the full source text, consuming the lexer. - pub fn lex(self) -> (Vec, Vec) { + pub fn lex(self) -> (Vec>, Vec) { let mut tokens = vec![]; let mut errors = vec![]; @@ -82,14 +105,15 @@ impl<'a> Lexer<'a> { } impl<'a> Iterator for Lexer<'a> { - type Item = Result; + type Item = Result, Error>; fn next(&mut self) -> Option { if self.finished { return None; } + if self.input.is_empty() { - let mut eof = Token::new(TokenKind::Eof, String::from("EOF")); + let mut eof = Token::new(TokenKind::Eof, "EOF"); eof.index = self.index; self.finished = true; @@ -101,345 +125,447 @@ impl<'a> Iterator for Lexer<'a> { self.finished = true; return Some(Err(Error::limit( "token limit reached, aborting lexing", - self.index, + self.cursor.index(), ))); } - let mut c = Cursor::new(self.input); - let r = c.advance(); + match self.cursor.advance() { + Ok(token) => { + if matches!(token.kind(), TokenKind::Eof) { + self.finished = true; - match r { - Ok(mut token) => { - token.index = self.index; - self.index += token.data.len(); + return Some(Ok(token)); + } - self.input = &self.input[token.data.len()..]; Some(Ok(token)) } - Err(mut err) => { - err.index = self.index; - self.index += err.data.len(); - - self.input = &self.input[err.data.len()..]; - Some(Err(err)) - } + Err(err) => Some(Err(err)), } } } -impl Cursor<'_> { - fn advance(&mut self) -> Result { - let first_char = self.bump().unwrap(); - - match first_char { - '"' => self.string_value(first_char), - '#' => self.comment(first_char), - '.' => self.spread_operator(first_char), - c if is_whitespace(c) => self.whitespace(c), - c if is_ident_char(c) => self.ident(c), - c @ '-' | c @ '+' => self.number(c), - c if is_digit_char(c) => self.number(c), - '!' => Ok(Token::new(TokenKind::Bang, first_char.into())), - '$' => Ok(Token::new(TokenKind::Dollar, first_char.into())), - '&' => Ok(Token::new(TokenKind::Amp, first_char.into())), - '(' => Ok(Token::new(TokenKind::LParen, first_char.into())), - ')' => Ok(Token::new(TokenKind::RParen, first_char.into())), - ':' => Ok(Token::new(TokenKind::Colon, first_char.into())), - ',' => Ok(Token::new(TokenKind::Comma, first_char.into())), - '=' => Ok(Token::new(TokenKind::Eq, first_char.into())), - '@' => Ok(Token::new(TokenKind::At, first_char.into())), - '[' => Ok(Token::new(TokenKind::LBracket, first_char.into())), - ']' => Ok(Token::new(TokenKind::RBracket, first_char.into())), - '{' => Ok(Token::new(TokenKind::LCurly, first_char.into())), - '|' => Ok(Token::new(TokenKind::Pipe, first_char.into())), - '}' => Ok(Token::new(TokenKind::RCurly, first_char.into())), - c => Err(Error::new("Unexpected character", c.to_string())), - } - } - - fn string_value(&mut self, first_char: char) -> Result { - // TODO @lrlna: consider using a 'terminated' bool to store whether a string - // character or block character are terminated (rust's lexer does this). - let mut buf = String::new(); - buf.push(first_char); // the first " we already matched on - - let Some(c) = self.bump() else { - return Err(Error::new( - "unexpected end of data while lexing string value", - "\"".to_string(), - )); +impl<'a> Cursor<'a> { + fn advance(&mut self) -> Result, Error> { + let mut state = State::Start; + let mut token = Token { + kind: TokenKind::Eof, + data: "EOF", + index: self.index(), }; - match c { - '"' => self.block_string_value(buf, c), - t => { - buf.push(t); - let mut was_backslash = t == '\\'; - let mut unicode_chars_left = 0; - - while !self.is_eof() { - let c = self.bump().unwrap(); - - if unicode_chars_left > 0 { - unicode_chars_left -= 1; - - if c == '"' { - buf.push('"'); - self.add_err(Error::new( - "incomplete unicode escape sequence", - c.to_string(), - )); - break; + while let Some(c) = self.bump() { + match state { + State::Start => { + match c { + '"' => { + token.kind = TokenKind::StringValue; + state = State::StringLiteralStart; } - - if !c.is_ascii_hexdigit() { - self.add_err(Error::new( - "invalid unicode escape sequence", - c.to_string(), - )); + '#' => { + token.kind = TokenKind::Comment; + state = State::Comment; } - } - - if was_backslash { - match c { - 'u' => unicode_chars_left = 4, - c if is_escaped_char(c) => (), - c => { - self.add_err(Error::new( - "unexpected escaped character", - c.to_string(), - )); - } + '.' => { + token.kind = TokenKind::Spread; + state = State::SpreadOperator; } - } - - buf.push(c); - if c == '"' { - if !was_backslash { - break; + c if is_whitespace(c) => { + token.kind = TokenKind::Whitespace; + state = State::Whitespace; } - } else if is_line_terminator(c) { - self.add_err(Error::new("unexpected line terminator", c.to_string())); - } - was_backslash = c == '\\' && !was_backslash; + c if is_ident_char(c) => { + token.kind = TokenKind::Name; + state = State::Ident; + } + '+' | '-' => { + token.kind = TokenKind::Int; + state = State::PlusMinus; + } + c if c.is_ascii_digit() => { + token.kind = TokenKind::Int; + state = State::IntLiteral; + } + '!' => { + token.kind = TokenKind::Bang; + token.data = self.current_str(); + return Ok(token); + } + '$' => { + token.kind = TokenKind::Dollar; + token.data = self.current_str(); + return Ok(token); + } + '&' => { + token.kind = TokenKind::Amp; + token.data = self.current_str(); + return Ok(token); + } + '(' => { + token.kind = TokenKind::LParen; + token.data = self.current_str(); + return Ok(token); + } + ')' => { + token.kind = TokenKind::RParen; + token.data = self.current_str(); + return Ok(token); + } + ':' => { + token.kind = TokenKind::Colon; + token.data = self.current_str(); + return Ok(token); + } + ',' => { + token.kind = TokenKind::Comma; + token.data = self.current_str(); + return Ok(token); + } + '=' => { + token.kind = TokenKind::Eq; + token.data = self.current_str(); + return Ok(token); + } + '@' => { + token.kind = TokenKind::At; + token.data = self.current_str(); + return Ok(token); + } + '[' => { + token.kind = TokenKind::LBracket; + token.data = self.current_str(); + return Ok(token); + } + ']' => { + token.kind = TokenKind::RBracket; + token.data = self.current_str(); + return Ok(token); + } + '{' => { + token.kind = TokenKind::LCurly; + token.data = self.current_str(); + return Ok(token); + } + '|' => { + token.kind = TokenKind::Pipe; + token.data = self.current_str(); + return Ok(token); + } + '}' => { + token.kind = TokenKind::RCurly; + token.data = self.current_str(); + return Ok(token); + } + c => { + return Err(Error::new( + format!("Unexpected character \"{}\"", c), + self.current_str().to_string(), + )) + } + }; } + State::Ident => match c { + curr if is_ident_char(curr) || curr.is_ascii_digit() => {} + _ => { + token.data = self.prev_str(); - if !buf.ends_with('"') { - // If it's an unclosed string then take all remaining tokens into this string value - while !self.is_eof() { - buf.push(self.bump().unwrap()); + state = State::Done; + break; } - self.add_err(Error::new("unterminated string value", buf.clone())); - } - - if let Some(mut err) = self.err() { - err.set_data(buf); - return Err(err); - } - - Ok(Token::new(TokenKind::StringValue, buf)) - } - } - } - - fn block_string_value(&mut self, mut buf: String, char: char) -> Result { - buf.push(char); // the second " we already matched on - - let c = match self.bump() { - None => { - return Ok(Token::new(TokenKind::StringValue, buf)); - } - Some(c) => c, - }; - - if let first_char @ '"' = c { - buf.push(first_char); - - while !self.is_eof() { - let c = self.bump().unwrap(); - let was_backslash = c == '\\'; + }, + State::Whitespace => match c { + curr if is_whitespace(curr) => {} + _ => { + token.data = self.prev_str(); + + state = State::Done; + break; + } + }, + State::BlockStringLiteral => match c { + '\\' => { + state = State::BlockStringLiteralBackslash; + } + '"' => { + // Require two additional quotes to complete the triple quote. + if self.eatc('"') && self.eatc('"') { + token.data = self.current_str(); - if was_backslash && !is_escaped_char(c) && c != 'u' { - self.add_err(Error::new("unexpected escaped character", c.to_string())); - } + state = State::Done; + break; + } + } + _ => {} + }, + State::StringLiteralStart => match c { + '"' => { + if self.eatc('"') { + state = State::BlockStringLiteral; + + continue; + } - buf.push(c); + if self.is_pending() { + token.data = self.prev_str(); + } else { + token.data = self.current_str(); + } - if was_backslash { - while self.first() == '"' { - buf.push(self.first()); - self.bump(); + state = State::Done; + break; } - } else if c == '"' && ('"', '"') == (self.first(), self.second()) { - buf.push(self.first()); - buf.push(self.second()); - self.bump(); - self.bump(); - break; - } - } - } - - Ok(Token::new(TokenKind::StringValue, buf)) - } + '\\' => { + state = State::StringLiteralBackslash; + } + _ => { + state = State::StringLiteral; - fn comment(&mut self, first_char: char) -> Result { - let mut buf = String::new(); - buf.push(first_char); + continue; + } + }, + State::BlockStringLiteralEscapedUnicode(remaining) => match c { + '"' => { + self.add_err(Error::new( + "incomplete unicode escape sequence", + c.to_string(), + )); + token.data = self.current_str(); + state = State::Done; - while !self.is_eof() { - let first = self.bump().unwrap(); - if !is_line_terminator(first) { - buf.push(first); - } else { - break; - } - } + break; + } + c if !c.is_ascii_hexdigit() => { + self.add_err(Error::new("invalid unicode escape sequence", c.to_string())); + state = State::BlockStringLiteral; - Ok(Token::new(TokenKind::Comment, buf)) - } + continue; + } + _ => { + if remaining <= 1 { + state = State::BlockStringLiteral; - fn spread_operator(&mut self, first_char: char) -> Result { - let mut buf = String::new(); - buf.push(first_char); + continue; + } - match (self.first(), self.second()) { - ('.', '.') => { - buf.push('.'); - buf.push('.'); - self.bump(); - self.bump(); - } - ('.', b) => { - self.bump(); - buf.push('.'); + state = State::BlockStringLiteralEscapedUnicode(remaining - 1) + } + }, + State::StringLiteralEscapedUnicode(remaining) => match c { + '"' => { + self.add_err(Error::new( + "incomplete unicode escape sequence", + c.to_string(), + )); + token.data = self.current_str(); + state = State::Done; - self.add_err(Error::new("Unterminated spread operator", format!("..{b}"))); - } - (a, b) => self.add_err(Error::new( - "Unterminated spread operator", - format!(".{a}{b}"), - )), - } + break; + } + c if !c.is_ascii_hexdigit() => { + self.add_err(Error::new("invalid unicode escape sequence", c.to_string())); + state = State::StringLiteral; - if let Some(mut err) = self.err() { - err.set_data(buf); - return Err(err); - } + continue; + } + _ => { + if remaining <= 1 { + state = State::StringLiteral; - Ok(Token::new(TokenKind::Spread, buf)) - } + continue; + } - fn whitespace(&mut self, first_char: char) -> Result { - let mut buf = String::new(); - buf.push(first_char); + state = State::StringLiteralEscapedUnicode(remaining - 1) + } + }, + State::StringLiteral => match c { + '"' => { + token.data = self.current_str(); - while !self.is_eof() { - let first = self.bump().unwrap(); - if is_whitespace(first) { - buf.push(first); - } else { - break; - } - } + state = State::Done; + break; + } + curr if is_line_terminator(curr) => { + self.add_err(Error::new("unexpected line terminator", "".to_string())); + } + '\\' => { + state = State::StringLiteralBackslash; + } + _ => {} + }, + State::BlockStringLiteralBackslash => match c { + '"' => { + while self.eatc('"') {} - Ok(Token::new(TokenKind::Whitespace, buf)) - } + state = State::BlockStringLiteral; + } + curr if is_escaped_char(curr) => { + state = State::BlockStringLiteral; + } + 'u' => { + state = State::BlockStringLiteralEscapedUnicode(4); + } + _ => { + self.add_err(Error::new("unexpected escaped character", c.to_string())); - fn ident(&mut self, first_char: char) -> Result { - let mut buf = String::new(); - buf.push(first_char); - - while !self.is_eof() { - let first = self.first(); - if is_ident_char(first) || is_digit_char(first) { - buf.push(first); - self.bump(); - } else { - break; - } - } + state = State::BlockStringLiteral; + } + }, + State::StringLiteralBackslash => match c { + curr if is_escaped_char(curr) => { + state = State::StringLiteral; + } + 'u' => { + state = State::StringLiteralEscapedUnicode(4); + } + _ => { + self.add_err(Error::new("unexpected escaped character", c.to_string())); - Ok(Token::new(TokenKind::Name, buf)) - } + state = State::StringLiteral; + } + }, + State::IntLiteral => match c { + curr if curr.is_ascii_digit() => {} + '.' => { + token.kind = TokenKind::Float; + state = State::FloatLiteral; + } + 'e' | 'E' => { + token.kind = TokenKind::Float; + state = State::ExponentLiteral; + } + _ => { + token.data = self.prev_str(); - fn number(&mut self, first_digit: char) -> Result { - let mut buf = String::new(); - buf.push(first_digit); - - let mut has_exponent = false; - let mut has_fractional = false; - let mut has_digit = is_digit_char(first_digit); - - while !self.is_eof() { - let first = self.first(); - match first { - 'e' | 'E' => { - buf.push(first); - self.bump(); - if !has_digit { - self.add_err(Error::new( - format!("Unexpected character `{first}` in exponent"), - first.to_string(), - )); + state = State::Done; + break; } - if has_exponent { + }, + State::FloatLiteral => match c { + curr if curr.is_ascii_digit() => {} + '.' => { self.add_err(Error::new( - format!("Unexpected character `{first}`"), - first.to_string(), + format!("Unexpected character `{}`", c), + c.to_string(), )); + + continue; } - has_exponent = true; - if matches!(self.first(), '+' | '-') { - buf.push(self.first()); - self.bump(); + 'e' | 'E' => { + state = State::ExponentLiteral; } - } - '.' => { - buf.push(first); - self.bump(); + _ => { + token.data = self.prev_str(); - if !has_digit { - self.add_err(Error::new( - format!("Unexpected character `{first}` before a digit"), - first.to_string(), + state = State::Done; + break; + } + }, + State::ExponentLiteral => match c { + curr if curr.is_ascii_digit() => { + state = State::FloatLiteral; + } + '+' | '-' => { + state = State::FloatLiteral; + } + _ => { + let err = self.current_str(); + return Err(Error::new( + format!("Unexpected character `{}`", err), + err.to_string(), )); } + }, + State::SpreadOperator => match c { + '.' => { + if self.eatc('.') { + token.data = self.current_str(); + return Ok(token); + } - if has_fractional { - self.add_err(Error::new( - format!("Unexpected character `{first}`"), - first.to_string(), + break; + } + _ => break, + }, + State::PlusMinus => match c { + curr if curr.is_ascii_digit() => { + state = State::IntLiteral; + } + _ => { + let curr = self.current_str(); + return Err(Error::new( + format!("Unexpected character `{}`", curr), + curr.to_string(), )); } + }, + State::Comment => match c { + curr if is_line_terminator(curr) => { + token.data = self.prev_str(); - if has_exponent { - self.add_err(Error::new( - format!("Unexpected character `{first}`"), - first.to_string(), - )); + state = State::Done; + break; } + _ => {} + }, + State::Done => unreachable!("must finalize loop when State::Done"), + } + } - has_fractional = true; - } - first if is_digit_char(first) => { - buf.push(first); - self.bump(); - has_digit = true; + match state { + State::Done => { + if let Some(mut err) = self.err() { + err.set_data(token.data.to_string()); + err.index = token.index; + self.err = None; + + return Err(err); } - _ => break, + + Ok(token) } - } + State::Start => { + token.index += 1; + Ok(token) + } + State::StringLiteralStart => { + let curr = self.current_str(); - if let Some(mut err) = self.err() { - err.set_data(buf); - return Err(err); - } + Err(Error::new( + "unexpected end of data while lexing string value", + curr.to_string(), + )) + } + State::StringLiteral => { + let curr = self.drain(); + + Err(Error::with_loc( + "unterminated string value", + curr.to_string(), + token.index, + )) + } + State::SpreadOperator => { + let data = if self.is_pending() { + self.prev_str() + } else { + self.current_str() + }; + + Err(Error::with_loc( + "Unterminated spread operator", + data.to_string(), + token.index, + )) + } + _ => { + if let Some(mut err) = self.err() { + err.set_data(self.current_str().to_string()); + return Err(err); + } + + token.data = self.current_str(); - if has_exponent || has_fractional { - Ok(Token::new(TokenKind::Float, buf)) - } else { - Ok(Token::new(TokenKind::Int, buf)) + Ok(token) + } } } } @@ -480,22 +606,12 @@ fn is_line_terminator(c: char) -> bool { matches!(c, '\n' | '\r') } -fn is_digit_char(c: char) -> bool { - c.is_ascii_digit() -} - // EscapedCharacter // " \ / b f n r t fn is_escaped_char(c: char) -> bool { matches!(c, '"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't') } -// SourceCharacter -// /[\u0009\u000A\u000D\u0020-\uFFFF]/ -// fn is_source_char(c: char) -> bool { -// matches!(c, '\t' | '\r' | '\n' | '\u{0020}'..='\u{FFFF}') -// } - #[cfg(test)] mod test { use super::*; @@ -505,7 +621,7 @@ mod test { let schema = r#" type Query { name: String - format: String = "Y-m-d\\TH:i:sP" + format: String = "Y-m-d\\TH:i:sP" } "#; let (tokens, errors) = Lexer::new(schema).lex(); diff --git a/crates/apollo-parser/src/lexer/token.rs b/crates/apollo-parser/src/lexer/token.rs index ae230ba3c..a700665c8 100644 --- a/crates/apollo-parser/src/lexer/token.rs +++ b/crates/apollo-parser/src/lexer/token.rs @@ -4,14 +4,14 @@ use crate::TokenKind; /// A token generated by the lexer. #[derive(Clone)] -pub struct Token { +pub struct Token<'a> { pub(crate) kind: TokenKind, - pub(crate) data: String, + pub(crate) data: &'a str, pub(crate) index: usize, } -impl Token { - pub(crate) fn new(kind: TokenKind, data: String) -> Self { +impl<'a> Token<'a> { + pub(crate) fn new(kind: TokenKind, data: &'a str) -> Self { Self { kind, data, @@ -26,7 +26,7 @@ impl Token { /// Get a reference to the token's data. pub fn data(&self) -> &str { - self.data.as_str() + self.data } /// Get a reference to the token's loc. @@ -35,7 +35,7 @@ impl Token { } } -impl fmt::Debug for Token { +impl<'a> fmt::Debug for Token<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let start = self.index; let end = self.index + self.data.len(); diff --git a/crates/apollo-parser/src/parser/grammar/ty.rs b/crates/apollo-parser/src/parser/grammar/ty.rs index f57f0dfce..3f2acdf1e 100644 --- a/crates/apollo-parser/src/parser/grammar/ty.rs +++ b/crates/apollo-parser/src/parser/grammar/ty.rs @@ -30,7 +30,7 @@ pub(crate) fn ty(p: &mut Parser) { /// When errors occur deeper inside nested types like lists, this function /// pushes errors *inside* the list to the parser, and returns an Ok() with /// an incomplete type. -fn parse(p: &mut Parser) -> Result<(), Token> { +fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Token<'a>> { let checkpoint = p.checkpoint_node(); match p.peek() { Some(T!['[']) => { diff --git a/crates/apollo-parser/src/parser/mod.rs b/crates/apollo-parser/src/parser/mod.rs index 2e25a284a..e5298d1d9 100644 --- a/crates/apollo-parser/src/parser/mod.rs +++ b/crates/apollo-parser/src/parser/mod.rs @@ -73,11 +73,11 @@ pub(crate) use token_text::TokenText; pub struct Parser<'a> { lexer: Lexer<'a>, /// Store one lookahead token so we don't need to reparse things as much. - current_token: Option, + current_token: Option>, /// The in-progress tree. builder: Rc>, /// Ignored tokens that should be added to the tree. - ignored: Vec, + ignored: Vec>, /// The list of syntax errors we've accumulated so far. errors: Vec, /// The limit to apply to parsing. @@ -296,7 +296,7 @@ impl<'a> Parser<'a> { } /// Gets the next token from the lexer. - fn next_token(&mut self) -> Option { + fn next_token(&mut self) -> Option> { for res in &mut self.lexer { match res { Err(err) => { @@ -315,7 +315,7 @@ impl<'a> Parser<'a> { } /// Consume a token from the lexer. - pub(crate) fn pop(&mut self) -> Token { + pub(crate) fn pop(&mut self) -> Token<'a> { if let Some(token) = self.current_token.take() { return token; } @@ -371,6 +371,15 @@ impl<'a> Parser<'a> { /// Peek Token `n` and return it. pub(crate) fn peek_token_n(&self, n: usize) -> Option { + self.peek_n_inner(n) + } + + /// Peek Token `n` and return its TokenKind. + pub(crate) fn peek_n(&self, n: usize) -> Option { + self.peek_n_inner(n).map(|token| token.kind()) + } + + fn peek_n_inner(&self, n: usize) -> Option { self.current_token .iter() .cloned() @@ -381,11 +390,6 @@ impl<'a> Parser<'a> { .nth(n - 1) } - /// Peek Token `n` and return its TokenKind. - pub(crate) fn peek_n(&self, n: usize) -> Option { - self.peek_token_n(n).map(|token| token.kind()) - } - /// Peek next Token's `data` property. pub(crate) fn peek_data(&mut self) -> Option { self.peek_token().map(|token| token.data().to_string()) @@ -593,6 +597,7 @@ mod tests { let ast = Parser::new("type Query { a a a a a a a a a }") .token_limit(100) .parse(); - assert_eq!(ast.token_limit().high, 25); + // token count includes EOF token. + assert_eq!(ast.token_limit().high, 26); } } diff --git a/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.graphql b/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.graphql new file mode 100644 index 000000000..cf50d2253 --- /dev/null +++ b/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.graphql @@ -0,0 +1,5 @@ +mutation UpdateStuff { + stuffUpdate(input: { + tags: "really great 👻 halloween" + }) { stuff { tags } } +} diff --git a/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.txt b/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.txt new file mode 100644 index 000000000..a9e7a85f0 --- /dev/null +++ b/crates/apollo-parser/test_data/lexer/ok/0013_emoji_char_in_string_value.txt @@ -0,0 +1,36 @@ +NAME@0:8 "mutation" +WHITESPACE@8:9 " " +NAME@9:20 "UpdateStuff" +WHITESPACE@20:21 " " +L_CURLY@21:22 "{" +WHITESPACE@22:25 "\n " +NAME@25:36 "stuffUpdate" +L_PAREN@36:37 "(" +NAME@37:42 "input" +COLON@42:43 ":" +WHITESPACE@43:44 " " +L_CURLY@44:45 "{" +WHITESPACE@45:50 "\n " +NAME@50:54 "tags" +COLON@54:55 ":" +WHITESPACE@55:56 " " +STRING_VALUE@56:85 "\"really great 👻 halloween\"" +WHITESPACE@85:88 "\n " +R_CURLY@88:89 "}" +R_PAREN@89:90 ")" +WHITESPACE@90:91 " " +L_CURLY@91:92 "{" +WHITESPACE@92:93 " " +NAME@93:98 "stuff" +WHITESPACE@98:99 " " +L_CURLY@99:100 "{" +WHITESPACE@100:101 " " +NAME@101:105 "tags" +WHITESPACE@105:106 " " +R_CURLY@106:107 "}" +WHITESPACE@107:108 " " +R_CURLY@108:109 "}" +WHITESPACE@109:110 "\n" +R_CURLY@110:111 "}" +WHITESPACE@111:112 "\n" +EOF@112:112