From 2b1919494d8eb62221db1742552fea20698d585d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=B5=20Ho=C3=A0ng=20Long?= <78085736+vohoanglong0107@users.noreply.github.com> Date: Fri, 5 Apr 2024 15:14:05 +0900 Subject: [PATCH] feat: graphql lexer (#2271) --- Cargo.lock | 19 + .../src/generated/node_factory.rs | 39 +- .../src/generated/syntax_factory.rs | 54 +- crates/biome_graphql_parser/Cargo.toml | 32 + crates/biome_graphql_parser/src/lexer/mod.rs | 766 ++++++++++++++++++ .../biome_graphql_parser/src/lexer/tests.rs | 366 +++++++++ crates/biome_graphql_parser/src/lib.rs | 110 +++ .../src/parser/definitions/mod.rs | 58 ++ .../src/parser/definitions/operation.rs | 137 ++++ .../src/parser/directive.rs | 67 ++ crates/biome_graphql_parser/src/parser/mod.rs | 90 ++ .../src/parser/parse_error.rs | 24 + .../biome_graphql_parser/src/token_source.rs | 144 ++++ .../graphql_test_suite/ok/operation.graphql | 7 + .../ok/operation.graphql.snap | 102 +++ .../biome_graphql_parser/tests/spec_test.rs | 116 +++ .../biome_graphql_parser/tests/spec_tests.rs | 13 + .../src/generated/kind.rs | 16 +- .../src/generated/macros.rs | 8 +- .../src/generated/nodes.rs | 173 ++-- .../src/generated/nodes_mut.rs | 28 +- crates/biome_graphql_syntax/src/lib.rs | 6 +- .../biome_graphql_syntax/src/syntax_node.rs | 4 +- xtask/codegen/graphql.ungram | 7 +- xtask/codegen/src/graphql_kind_src.rs | 17 +- 25 files changed, 2259 insertions(+), 144 deletions(-) create mode 100644 crates/biome_graphql_parser/Cargo.toml create mode 100644 crates/biome_graphql_parser/src/lexer/mod.rs create mode 100644 crates/biome_graphql_parser/src/lexer/tests.rs create mode 100644 crates/biome_graphql_parser/src/lib.rs create mode 100644 crates/biome_graphql_parser/src/parser/definitions/mod.rs create mode 100644 crates/biome_graphql_parser/src/parser/definitions/operation.rs create mode 100644 crates/biome_graphql_parser/src/parser/directive.rs create mode 100644 crates/biome_graphql_parser/src/parser/mod.rs create mode 100644 crates/biome_graphql_parser/src/parser/parse_error.rs create mode 100644 crates/biome_graphql_parser/src/token_source.rs create mode 100644 crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql create mode 100644 crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql.snap create mode 100644 crates/biome_graphql_parser/tests/spec_test.rs create mode 100644 crates/biome_graphql_parser/tests/spec_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 24df0caa47eb..0730d93fceb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -458,6 +458,25 @@ dependencies = [ "biome_rowan", ] +[[package]] +name = "biome_graphql_parser" +version = "0.1.0" +dependencies = [ + "biome_console", + "biome_diagnostics", + "biome_graphql_factory", + "biome_graphql_syntax", + "biome_parser", + "biome_rowan", + "biome_unicode_table", + "insta", + "quickcheck", + "quickcheck_macros", + "tests_macros", + "tracing", + "unicode-bom", +] + [[package]] name = "biome_graphql_syntax" version = "0.1.0" diff --git a/crates/biome_graphql_factory/src/generated/node_factory.rs b/crates/biome_graphql_factory/src/generated/node_factory.rs index 403251c9c86f..e5b6c8a97c99 100644 --- a/crates/biome_graphql_factory/src/generated/node_factory.rs +++ b/crates/biome_graphql_factory/src/generated/node_factory.rs @@ -184,14 +184,6 @@ pub fn graphql_directive_location(value_token_token: SyntaxToken) -> GraphqlDire [Some(SyntaxElement::Token(value_token_token))], )) } -pub fn graphql_document(graphql_definition_list: GraphqlDefinitionList) -> GraphqlDocument { - GraphqlDocument::unwrap_cast(SyntaxNode::new_detached( - GraphqlSyntaxKind::GRAPHQL_DOCUMENT, - [Some(SyntaxElement::Node( - graphql_definition_list.into_syntax(), - ))], - )) -} pub fn graphql_enum_type_definition( enum_token: SyntaxToken, name: GraphqlName, @@ -1126,6 +1118,37 @@ pub fn graphql_operation_type(value_token_token: SyntaxToken) -> GraphqlOperatio [Some(SyntaxElement::Token(value_token_token))], )) } +pub fn graphql_root( + definitions: GraphqlDefinitionList, + eof_token: SyntaxToken, +) -> GraphqlRootBuilder { + GraphqlRootBuilder { + definitions, + eof_token, + bom_token: None, + } +} +pub struct GraphqlRootBuilder { + definitions: GraphqlDefinitionList, + eof_token: SyntaxToken, + bom_token: Option, +} +impl GraphqlRootBuilder { + pub fn with_bom_token(mut self, bom_token: SyntaxToken) -> Self { + self.bom_token = Some(bom_token); + self + } + pub fn build(self) -> GraphqlRoot { + GraphqlRoot::unwrap_cast(SyntaxNode::new_detached( + GraphqlSyntaxKind::GRAPHQL_ROOT, + [ + self.bom_token.map(|token| SyntaxElement::Token(token)), + Some(SyntaxElement::Node(self.definitions.into_syntax())), + Some(SyntaxElement::Token(self.eof_token)), + ], + )) + } +} pub fn graphql_root_operation_type_definition( operation_type: GraphqlOperationType, colon_token: SyntaxToken, diff --git a/crates/biome_graphql_factory/src/generated/syntax_factory.rs b/crates/biome_graphql_factory/src/generated/syntax_factory.rs index e41fb5b511f2..191010045a18 100644 --- a/crates/biome_graphql_factory/src/generated/syntax_factory.rs +++ b/crates/biome_graphql_factory/src/generated/syntax_factory.rs @@ -357,25 +357,6 @@ impl SyntaxFactory for GraphqlSyntaxFactory { } slots.into_node(GRAPHQL_DIRECTIVE_LOCATION, children) } - GRAPHQL_DOCUMENT => { - let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); - let mut current_element = elements.next(); - if let Some(element) = ¤t_element { - if GraphqlDefinitionList::can_cast(element.kind()) { - slots.mark_present(); - current_element = elements.next(); - } - } - slots.next_slot(); - if current_element.is_some() { - return RawSyntaxNode::new( - GRAPHQL_DOCUMENT.to_bogus(), - children.into_iter().map(Some), - ); - } - slots.into_node(GRAPHQL_DOCUMENT, children) - } GRAPHQL_ENUM_TYPE_DEFINITION => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<5usize> = RawNodeSlots::default(); @@ -1426,7 +1407,7 @@ impl SyntaxFactory for GraphqlSyntaxFactory { let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element { - if element.kind() == IDENT { + if element.kind() == GRAPHQL_NAME { slots.mark_present(); current_element = elements.next(); } @@ -1831,6 +1812,39 @@ impl SyntaxFactory for GraphqlSyntaxFactory { } slots.into_node(GRAPHQL_OPERATION_TYPE, children) } + GRAPHQL_ROOT => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if element.kind() == T![UNICODE_BOM] { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if GraphqlDefinitionList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if element.kind() == T![EOF] { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + GRAPHQL_ROOT.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(GRAPHQL_ROOT, children) + } GRAPHQL_ROOT_OPERATION_TYPE_DEFINITION => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); diff --git a/crates/biome_graphql_parser/Cargo.toml b/crates/biome_graphql_parser/Cargo.toml new file mode 100644 index 000000000000..85079447ec44 --- /dev/null +++ b/crates/biome_graphql_parser/Cargo.toml @@ -0,0 +1,32 @@ +[package] +authors.workspace = true +categories.workspace = true +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +name = "biome_graphql_parser" +repository.workspace = true +version = "0.1.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +biome_console = { workspace = true } +biome_diagnostics = { workspace = true } +biome_graphql_factory = { workspace = true } +biome_graphql_syntax = { workspace = true } +biome_parser = { workspace = true } +biome_rowan = { workspace = true } +biome_unicode_table = { workspace = true } +tracing = { workspace = true } +unicode-bom = { workspace = true } + +[dev-dependencies] +insta = { workspace = true } +quickcheck = { workspace = true } +quickcheck_macros = { workspace = true } +tests_macros = { workspace = true } + +[lints] +workspace = true diff --git a/crates/biome_graphql_parser/src/lexer/mod.rs b/crates/biome_graphql_parser/src/lexer/mod.rs new file mode 100644 index 000000000000..00322d4cd557 --- /dev/null +++ b/crates/biome_graphql_parser/src/lexer/mod.rs @@ -0,0 +1,766 @@ +//! An extremely fast, lookup table based, GraphQL lexer which yields SyntaxKind tokens used by the biome GraphQL parser. +#[rustfmt::skip] +mod tests; + +use biome_graphql_syntax::{GraphqlSyntaxKind, GraphqlSyntaxKind::*, TextLen, TextSize, T}; +use biome_parser::diagnostic::ParseDiagnostic; +use biome_parser::lexer::{Lexer, LexerCheckpoint, TokenFlags}; +use std::ops::Add; + +#[derive(Debug)] +pub struct GraphqlLexer<'src> { + /// Source text + source: &'src str, + + /// The start byte position in the source text of the next token. + position: usize, + + /// Byte offset of the current token from the start of the source + /// The range of the current token can be computed by + /// `self.position - self.current_start`. + current_start: TextSize, + + /// The kind of the current token + current_kind: GraphqlSyntaxKind, + + /// Flags for the current token + current_flags: TokenFlags, + + diagnostics: Vec, +} + +impl<'src> Lexer<'src> for GraphqlLexer<'src> { + const NEWLINE: Self::Kind = NEWLINE; + const WHITESPACE: Self::Kind = WHITESPACE; + + type Kind = GraphqlSyntaxKind; + type LexContext = (); + type ReLexContext = (); + + fn source(&self) -> &'src str { + self.source + } + + fn current(&self) -> Self::Kind { + self.current_kind + } + + fn position(&self) -> usize { + self.position + } + + fn current_start(&self) -> TextSize { + self.current_start + } + + fn push_diagnostic(&mut self, diagnostic: ParseDiagnostic) { + self.diagnostics.push(diagnostic); + } + + fn has_unicode_escape(&self) -> bool { + self.current_flags().has_unicode_escape() + } + + fn has_preceding_line_break(&self) -> bool { + self.current_flags().has_preceding_line_break() + } + + fn consume_newline_or_whitespaces(&mut self) -> Self::Kind { + if self.consume_newline() { + self.current_flags + .set(TokenFlags::PRECEDING_LINE_BREAK, true); + NEWLINE + } else { + self.consume_whitespaces(); + WHITESPACE + } + } + + fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind { + self.current_start = self.text_position(); + self.current_flags = TokenFlags::empty(); + + let kind = match self.current_byte() { + Some(current) => self.consume_token(current), + None => EOF, + }; + + self.current_kind = kind; + + if !kind.is_trivia() { + self.current_flags + .set(TokenFlags::PRECEDING_LINE_BREAK, false); + } + + kind + } + + #[inline] + fn advance_char_unchecked(&mut self) { + let c = self.current_char_unchecked(); + self.position += c.len_utf8(); + } + + /// Advances the current position by `n` bytes. + #[inline] + fn advance(&mut self, n: usize) { + self.position += n; + } + + fn finish(self) -> Vec { + self.diagnostics + } + + fn rewind(&mut self, _checkpoint: LexerCheckpoint) { + unimplemented!("GraphQL lexer doesn't support rewinding"); + } +} + +impl<'src> GraphqlLexer<'src> { + /// Make a new lexer from a str, this is safe because strs are valid utf8 + pub fn from_str(source: &'src str) -> Self { + Self { + source, + current_kind: TOMBSTONE, + current_start: TextSize::from(0), + current_flags: TokenFlags::empty(), + position: 0, + diagnostics: vec![], + } + } + + /// Bumps the current byte and creates a lexed token of the passed in kind + fn consume_byte(&mut self, tok: GraphqlSyntaxKind) -> GraphqlSyntaxKind { + self.advance(1); + tok + } + + /// Lexes the next token + /// + /// Guaranteed to not be at the end of the file + fn consume_token(&mut self, current: u8) -> GraphqlSyntaxKind { + // lookup_byte is optimized for + match current { + b'!' => self.consume_byte(T![!]), + b'$' => self.consume_byte(T![$]), + b'&' => self.consume_byte(T![&]), + b'(' => self.consume_byte(T!['(']), + b')' => self.consume_byte(T![')']), + b'.' => self.consume_ellipsis(), + b':' => self.consume_byte(T![:]), + b'\n' | b'\r' | b'\t' | b' ' => self.consume_newline_or_whitespaces(), + b'"' => self.consume_string(), + b'=' => self.consume_byte(T![=]), + b'@' => self.consume_byte(T![@]), + b'[' => self.consume_byte(T!['[']), + b']' => self.consume_byte(T![']']), + b'{' => self.consume_byte(T!['{']), + b'|' => self.consume_byte(T![|]), + b'}' => self.consume_byte(T!['}']), + b'#' => self.consume_comment(), + _ if is_name_start(current) => self.consume_name(current), + _ if is_number_start(current) => self.consume_number(current), + _ if self.position == 0 && self.consume_potential_bom(UNICODE_BOM).is_some() => { + UNICODE_BOM + } + _ => self.consume_unexpected_character(), + } + } + + /// Lexes an ellipsis. + fn consume_ellipsis(&mut self) -> GraphqlSyntaxKind { + self.assert_byte(b'.'); + let start = self.position; + self.advance(1); + if self.current_byte() == Some(b'.') { + if self.byte_at(1) == Some(b'.') { + self.advance(2); + + if self.current_byte() == Some(b'.') { + while self.current_byte() == Some(b'.') { + self.advance(1); + } + + let end = self.position; + self.diagnostics.push( + ParseDiagnostic::new( + format!("'{}' isn't valid here.", ".".repeat(end - start)), + start..end, + ) + .with_hint("Did you mean '...'?"), + ); + + ERROR_TOKEN + } else { + DOT3 + } + } else { + self.advance(1); + self.diagnostics.push( + ParseDiagnostic::new("'..' isn't valid here.", start..self.position) + .with_hint("Did you mean '...'?"), + ); + + ERROR_TOKEN + } + } else { + self.diagnostics.push( + ParseDiagnostic::new("'.' isn't valid here.", start..self.position) + .with_hint("Did you mean '...'?"), + ); + + ERROR_TOKEN + } + } + + /// Lexes a name, and keywords. + fn consume_name(&mut self, first: u8) -> GraphqlSyntaxKind { + self.assert_current_char_boundary(); + + // Note to keep the buffer large enough to fit every possible keyword + // that the lexer can return. + const BUFFER_SIZE: usize = 32; + let mut buffer = [0u8; BUFFER_SIZE]; + buffer[0] = first; + let mut len = 1; + + self.advance_byte_or_char(first); + + while let Some(byte) = self.current_byte() { + if is_name_continue(byte) { + if len < BUFFER_SIZE { + buffer[len] = byte; + len += 1; + } + + self.advance(1) + } else { + break; + } + } + + match &buffer[..len] { + b"true" => TRUE_KW, + b"false" => FALSE_KW, + b"query" => QUERY_KW, + b"mutation" => MUTATION_KW, + b"subscription" => SUBSCRIPTION_KW, + b"fragment" => FRAGMENT_KW, + b"on" => ON_KW, + b"null" => NULL_KW, + b"schema" => SCHEMA_KW, + b"extend" => EXTEND_KW, + b"scalar" => SCALAR_KW, + b"type" => TYPE_KW, + b"implements" => IMPLEMENTS_KW, + b"interface" => INTERFACE_KW, + b"union" => UNION_KW, + b"enum" => ENUM_KW, + b"input" => INPUT_KW, + b"directive" => DIRECTIVE_KW, + b"repeatable" => REPEATABLE_KW, + b"QUERY" => UPPER_QUERY_KW, + b"MUTATION" => UPPER_MUTATION_KW, + b"SUBSCRIPTION" => UPPER_SUBSCRIPTION_KW, + b"FIELD" => UPPER_FIELD_KW, + b"FRAGMENT_DEFINITION" => FRAGMENT_DEFINITION_KW, + b"FRAGMENT_SPREAD" => FRAGMENT_SPREAD_KW, + b"INLINE_FRAGMENT" => INLINE_FRAGMENT_KW, + b"VARIABLE_DEFINITION" => VARIABLE_DEFINITION_KW, + b"SCHEMA" => UPPER_SCHEMA_KW, + b"SCALAR" => UPPER_SCALAR_KW, + b"OBJECT" => UPPER_OBJECT_KW, + b"FIELD_DEFINITION" => FIELD_DEFINITION_KW, + b"ARGUMENT_DEFINITION" => ARGUMENT_DEFINITION_KW, + b"INTERFACE" => UPPER_INTERFACE_KW, + b"UNION" => UPPER_UNION_KW, + b"ENUM" => UPPER_ENUM_KW, + b"ENUM_VALUE" => ENUM_VALUE_KW, + b"INPUT_OBJECT" => INPUT_OBJECT_KW, + b"INPUT_FIELD_DEFINITION" => INPUT_FIELD_DEFINITION_KW, + _ => GRAPHQL_NAME, + } + } + + #[inline] + fn consume_unexpected_character(&mut self) -> GraphqlSyntaxKind { + self.assert_current_char_boundary(); + + let char = self.current_char_unchecked(); + let err = ParseDiagnostic::new( + format!("unexpected character `{}`", char), + self.text_position()..self.text_position() + char.text_len(), + ); + self.diagnostics.push(err); + self.advance(char.len_utf8()); + + ERROR_TOKEN + } + + /// consume an entire number, be it a float, int, or scientific notion + fn consume_number(&mut self, first: u8) -> GraphqlSyntaxKind { + self.assert_current_char_boundary(); + + let start = self.text_position(); + + let mut state = match first { + b'-' => LexNumberState::Minus, + b'0' => LexNumberState::LeadingZero, + _ => LexNumberState::IntegerPart, + }; + self.advance(1); + + while let Some(chr) = self.current_byte() { + let new_state = match chr { + b'0'..=b'9' => self.consume_digit(chr, state), + b'.' => self.consume_fraction(state), + b'e' | b'E' => self.consume_exponent(chr, start, state), + _ => break, + }; + state = new_state; + } + + match state { + LexNumberState::LeadingZero | LexNumberState::IntegerPart => GRAPHQL_INT_LITERAL, + LexNumberState::FractionalPart | LexNumberState::Exponent => GRAPHQL_FLOAT_LITERAL, + LexNumberState::Minus => { + self.diagnostics.push(ParseDiagnostic::new( + "Unexpected token `-`", + start..self.text_position(), + )); + + ERROR_TOKEN + } + LexNumberState::Invalid(diagnostic) => { + self.diagnostics.push(diagnostic); + ERROR_TOKEN + } + } + } + + /// consume a single digit in a number + fn consume_digit(&mut self, chr: u8, state: LexNumberState) -> LexNumberState { + debug_assert!(chr.is_ascii_digit()); + match chr { + b'0' => { + let position = self.text_position(); + self.advance(1); + + match state { + LexNumberState::LeadingZero => { + let diagnostic = ParseDiagnostic::new( + "GraphQL doesn't allow numbers starting with zero", + position..position + TextSize::from(1), + ); + LexNumberState::Invalid(diagnostic) + } + LexNumberState::Minus => LexNumberState::LeadingZero, + _ => state, + } + } + b'1'..=b'9' => { + let position = self.text_position(); + self.advance(1); + + match state { + LexNumberState::LeadingZero => { + let diagnostic = ParseDiagnostic::new( + "GraphQL doesn't allow numbers starting with zero", + position..position + TextSize::from(1), + ); + LexNumberState::Invalid(diagnostic) + } + LexNumberState::Minus => LexNumberState::IntegerPart, + _ => state, + } + } + // should never happen + _ => { + let position = self.text_position(); + LexNumberState::Invalid(ParseDiagnostic::new( + "Invalid character", + position..position + TextSize::from(1), + )) + } + } + } + fn consume_fraction(&mut self, state: LexNumberState) -> LexNumberState { + self.assert_byte(b'.'); + let position = self.text_position(); + self.advance(1); + + if !self.current_byte().is_some_and(|b| b.is_ascii_digit()) { + LexNumberState::Invalid( + ParseDiagnostic::new("Missing fraction", position..position + TextSize::from(1)) + .with_hint("Remove the `.`"), + ) + } else { + match state { + LexNumberState::IntegerPart | LexNumberState::LeadingZero => { + LexNumberState::FractionalPart + } + invalid @ LexNumberState::Invalid(_) => invalid, + _ => LexNumberState::Invalid(ParseDiagnostic::new( + "Invalid fraction part", + position..position + TextSize::from(1), + )), + } + } + } + + fn consume_exponent( + &mut self, + chr: u8, + start: TextSize, + state: LexNumberState, + ) -> LexNumberState { + debug_assert!(matches!(chr, b'e' | b'E')); + let position = self.text_position(); + self.advance(1); + + if let Some(b'-' | b'+') = self.current_byte() { + self.advance(1); + } + + if !self.current_byte().is_some_and(|b| b.is_ascii_digit()) { + LexNumberState::Invalid( + ParseDiagnostic::new("Missing exponent", start..position).with_detail( + position..position + TextSize::from(1), + "Expected a digit as the exponent", + ), + ) + } else { + match state { + LexNumberState::LeadingZero + | LexNumberState::IntegerPart + | LexNumberState::FractionalPart => LexNumberState::Exponent, + invalid @ LexNumberState::Invalid(_) => invalid, + _ => LexNumberState::Invalid(ParseDiagnostic::new( + "Invalid exponent part", + position..position + TextSize::from(1), + )), + } + } + } + + fn consume_string(&mut self) -> GraphqlSyntaxKind { + self.assert_byte(b'"'); + let start = self.text_position(); + + self.advance(1); // Skip over the quote + + let mut state = LexStringState::Uninitialized; + + let mut has_error = false; + + while let Some(chr) = self.current_byte() { + let (new_state, diagnostic) = self.consume_string_character(chr, start, state); + state = new_state; + if let Some(diagnostic) = diagnostic { + self.push_diagnostic(diagnostic); + has_error = true; + } + if matches!(state, LexStringState::Terminated) { + break; + } + } + match state { + LexStringState::Terminated => { + if has_error { + ERROR_TOKEN + } else { + GRAPHQL_STRING_LITERAL + } + } + LexStringState::InString + | LexStringState::InBlockString + | LexStringState::Uninitialized => { + let unterminated = + ParseDiagnostic::new("Missing closing quote", start..self.text_position()) + .with_detail( + self.source.text_len()..self.source.text_len(), + "file ends here", + ); + self.diagnostics.push(unterminated); + + ERROR_TOKEN + } + } + } + + /// Lexes a character inside a string + fn consume_string_character( + &mut self, + chr: u8, + start: TextSize, + state: LexStringState, + ) -> (LexStringState, Option) { + self.assert_current_char_boundary(); + match state { + LexStringState::Uninitialized => match chr { + b'"' => self.consume_quote_in_string(state), + _ => (LexStringState::InString, None), + }, + LexStringState::InString => match chr { + b'"' => self.consume_quote_in_string(state), + b'\\' => self.consume_escape_sequence_in_string(state), + b'\n' | b'\r' => ( + LexStringState::Terminated, + Some( + ParseDiagnostic::new("Missing closing quote", start..self.text_position()) + .with_detail(self.position..self.position + 1, "line breaks here"), + ), + ), + _ => { + self.advance_char_unchecked(); + (state, None) + } + }, + LexStringState::InBlockString => match chr { + b'"' => self.consume_quote_in_string(state), + b'\\' => self.consume_escape_sequence_in_string(state), + _ => { + self.advance_char_unchecked(); + (state, None) + } + }, + // should never happen + _ => ( + state, + Some(ParseDiagnostic::new( + "String terminated", + self.position..self.position + 1, + )), + ), + } + } + + fn consume_quote_in_string( + &mut self, + state: LexStringState, + ) -> (LexStringState, Option) { + self.assert_byte(b'"'); + self.advance(1); + match state { + LexStringState::Uninitialized => { + if self.current_byte() == Some(b'"') { + self.advance(1); + (LexStringState::InBlockString, None) + } else { + // an empty string + (LexStringState::Terminated, None) + } + } + LexStringState::InString => (LexStringState::Terminated, None), + LexStringState::InBlockString => { + if self.current_byte() == Some(b'"') && self.byte_at(1) == Some(b'"') { + self.advance(2); + (LexStringState::Terminated, None) + } else { + (state, None) + } + } + // should never happen + _ => ( + state, + Some(ParseDiagnostic::new( + "String terminated", + self.position..self.position + 1, + )), + ), + } + } + + fn consume_escape_sequence_in_string( + &mut self, + state: LexStringState, + ) -> (LexStringState, Option) { + self.assert_byte(b'\\'); + let escape_start = self.text_position(); + self.advance(1); + match state { + // '\t' etc + LexStringState::InString => match self.current_byte() { + Some(b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't') => { + self.advance(1); + (state, None) + } + + Some(b'u') => match self.consume_unicode_escape() { + Err(diagnostic) => (state, Some(diagnostic)), + Ok(_) => (state, None), + }, + + Some(_) => { + let c = self.current_char_unchecked(); + let diagnostic = ParseDiagnostic::new( + "Invalid escape sequence", + escape_start..self.text_position() + c.text_len(), + ) + .with_alternatives( + "Expected one of the following values", + &[ + r"\\", + r"\/", + r#"\""#, + r"\b", + r"\f", + r"\n", + r"\r", + r"\t", + r"\uXXXX where X is hexedecimal number", + ], + ); + (state, Some(diagnostic)) + } + + None => ( + state, + Some( + ParseDiagnostic::new( + "Expected an escape sequence following a backslash, but found none", + escape_start..self.text_position(), + ) + .with_detail(self.text_position()..self.text_position(), "File ends here"), + ), + ), + }, + // '\"""' + LexStringState::InBlockString => { + if self.current_byte() == Some(b'"') + && self.byte_at(1) == Some(b'"') + && self.byte_at(2) == Some(b'"') + { + self.advance(3); + (state, None) + } else { + let c = self.current_char_unchecked(); + let diagnostic = ParseDiagnostic::new( + "Invalid escape sequence", + escape_start..self.text_position() + c.text_len(), + ) + .with_hint(r#"For block string the only valid escape sequences is `\"""`. "#); + (state, Some(diagnostic)) + } + } + // should never happen + _ => ( + state, + Some(ParseDiagnostic::new( + "String terminated", + self.position..self.position + 1, + )), + ), + } + } + + /// Lexes a `\u0000` escape sequence. Assumes that the lexer is positioned at the `u` token. + /// + /// A unicode escape sequence must consist of 4 hex characters. + fn consume_unicode_escape(&mut self) -> Result<(), ParseDiagnostic> { + self.assert_byte(b'u'); + self.assert_current_char_boundary(); + + let start = self.text_position(); + + let start = start + // Subtract 1 to get position of `\` + .checked_sub(TextSize::from(1)) + .unwrap_or(start); + + self.advance(1); // Advance over `u` + + for _ in 0..4 { + match self.current_byte() { + Some(byte) if byte.is_ascii_hexdigit() => self.advance(1), + Some(_) => { + let char = self.current_char_unchecked(); + // Reached a non hex digit which is invalid + return Err(ParseDiagnostic::new( + + "Invalid unicode sequence", + start..self.text_position(), + ) + .with_detail(self.text_position()..self.text_position().add(char.text_len()), "Non hexadecimal number") + .with_hint("A unicode escape sequence must consist of 4 hexadecimal numbers: `\\uXXXX`, e.g. `\\u002F' for '/'.")); + } + None => { + // Reached the end of the file before processing 4 hex digits + return Err(ParseDiagnostic::new( + "Unicode escape sequence with two few hexadecimal numbers.", + start..self.text_position(), + ) + .with_detail( + self.text_position()..self.text_position(), + "reached the end of the file", + ) + .with_hint("A unicode escape sequence must consist of 4 hexadecimal numbers: `\\uXXXX`, e.g. `\\u002F' for '/'.")); + } + } + } + + Ok(()) + } + + fn consume_comment(&mut self) -> GraphqlSyntaxKind { + self.assert_byte(b'#'); + + self.advance(1); + + while let Some(chr) = self.current_byte() { + match chr { + b'\n' | b'\r' => return COMMENT, + chr => self.advance_byte_or_char(chr), + } + } + COMMENT + } +} + +fn is_name_start(byte: u8) -> bool { + byte.is_ascii_alphabetic() || byte == b'_' +} + +fn is_name_continue(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || byte == b'_' +} + +fn is_number_start(byte: u8) -> bool { + byte.is_ascii_digit() || byte == b'-' +} + +#[derive(Copy, Clone, Debug)] +enum LexStringState { + Uninitialized, + + /// Between the opening `"` and closing `"` quotes. + InString, + + /// Between the opening `"""` and closing `"""` quotes. + InBlockString, + + /// Properly terminated string + Terminated, +} + +/// Current state of a number being parsed +#[derive(Debug, Clone)] +enum LexNumberState { + /// After a minus sign + Minus, + + /// After the first digit which is also Zero + LeadingZero, + + /// Parsing the digits before the exponent or fractional (after .`) part + IntegerPart, + + /// Parsing the digits after a `.` + FractionalPart, + + /// Parsing the exponent digits (after a `e` or `E`) + Exponent, + + /// Parsing the rest of an invalid number + Invalid(ParseDiagnostic), +} diff --git a/crates/biome_graphql_parser/src/lexer/tests.rs b/crates/biome_graphql_parser/src/lexer/tests.rs new file mode 100644 index 000000000000..3eb5e8bd252d --- /dev/null +++ b/crates/biome_graphql_parser/src/lexer/tests.rs @@ -0,0 +1,366 @@ +#![cfg(test)] +#![allow(unused_mut, unused_variables, unused_assignments)] + +use super::{GraphqlLexer, TextSize}; +use biome_graphql_syntax::GraphqlSyntaxKind::{self, EOF}; +use biome_parser::lexer::Lexer; +use quickcheck_macros::quickcheck; +use std::sync::mpsc::channel; +use std::thread; +use std::time::Duration; + +// Assert the result of lexing a piece of source code, +// and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens +macro_rules! assert_lex { + ($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{ + let mut lexer = GraphqlLexer::from_str($src); + let mut idx = 0; + let mut tok_idx = TextSize::default(); + + let mut new_str = String::with_capacity($src.len()); + let mut tokens = vec![]; + + while lexer.next_token(()) != EOF { + tokens.push((lexer.current(), lexer.current_range())); + } + + $( + assert_eq!( + tokens[idx].0, + GraphqlSyntaxKind::$kind, + "expected token kind {}, but found {:?}", + stringify!($kind), + tokens[idx].0, + ); + + assert_eq!( + tokens[idx].1.len(), + TextSize::from($len), + "expected token length of {}, but found {:?} for token {:?}", + $len, + tokens[idx].1.len(), + tokens[idx].0, + ); + + new_str.push_str(&$src[tokens[idx].1]); + tok_idx += tokens[idx].1.len(); + + idx += 1; + )* + + if idx < tokens.len() { + panic!( + "expected {} tokens but lexer returned {}, first unexpected token is '{:?}'", + idx, + tokens.len(), + tokens[idx].0 + ); + } else { + assert_eq!(idx, tokens.len()); + } + + assert_eq!($src, new_str, "Failed to reconstruct input"); + }}; +} + +// This is for testing if the lexer is truly lossless +// It parses random strings and puts them back together with the produced tokens and compares +#[quickcheck] +fn losslessness(string: String) -> bool { + // using an mpsc channel allows us to spawn a thread and spawn the lexer there, then if + // it takes more than 2 seconds we panic because it is 100% infinite recursion + let cloned = string.clone(); + let (sender, receiver) = channel(); + thread::spawn(move || { + let mut lexer = GraphqlLexer::from_str(&cloned); + let mut tokens = vec![]; + + while lexer.next_token(()) != EOF { + tokens.push(lexer.current_range()); + } + + sender + .send(tokens) + .expect("Could not send tokens to receiver"); + }); + let token_ranges = receiver + .recv_timeout(Duration::from_secs(2)) + .unwrap_or_else(|_| { + panic!( + "Lexer is infinitely recursing with this code: ->{}<-", + string + ) + }); + + let mut new_str = String::with_capacity(string.len()); + let mut idx = TextSize::from(0); + + for range in token_ranges { + new_str.push_str(&string[range]); + idx += range.len(); + } + + string == new_str +} + +#[test] +fn empty() { + assert_lex! { + "", + } +} + +#[test] +fn string() { + assert_lex! { + r#""5098382""#, + GRAPHQL_STRING_LITERAL:9 + } + + // single quote is just a normal char + assert_lex! { + r#""'hello""#, + GRAPHQL_STRING_LITERAL:8 + } + + // escaped quote + assert_lex! { + r#""hel\"lo\"""#, + GRAPHQL_STRING_LITERAL:11 + } + + // unicode + assert_lex! { + r#""юникод""#, + GRAPHQL_STRING_LITERAL:14 + } + + // missing double closing quote + assert_lex! { + r#""he"#, + ERROR_TOKEN:3 + } + + // line break + assert_lex! { + r#""he + "#, + ERROR_TOKEN:3, + NEWLINE:1, + WHITESPACE:4 + } + + // line break + assert_lex! { + r#""he + ""#, + ERROR_TOKEN:3, + NEWLINE:1, + WHITESPACE:4, + ERROR_TOKEN:1 + } + + assert_lex! { + r#""Escaped \n""#, + GRAPHQL_STRING_LITERAL:12 + } + + assert_lex! { + r#""Escaped \r""#, + GRAPHQL_STRING_LITERAL:12 + } + + // invalid escape sequence + assert_lex! { + r#""\0""#, + ERROR_TOKEN:4 + } + + // empty + assert_lex! { + r#""""#, + GRAPHQL_STRING_LITERAL:2 + } + + // block string newline + assert_lex! { + r#""""aaa + """"#, + // NEWLINE + 8 WHITESPACE + GRAPHQL_STRING_LITERAL:18 + } + + // block string unterminated + assert_lex! { + r#""""aaa + """#, + ERROR_TOKEN:17 + } + + // unterminated block string + assert_lex! { + r#" """" "#, + WHITESPACE:1, + ERROR_TOKEN:5, + } + + // unterminated block string + assert_lex! { + r#""""aaa + """#, + ERROR_TOKEN:17 + } + + // escape sequence + assert_lex! { + r#" """ \""" """ "#, + WHITESPACE:1, + GRAPHQL_STRING_LITERAL:12, + WHITESPACE:1, + } + + // invalid escape sequence + assert_lex! { + r#"""" \" \r \n \"" """ "#, + ERROR_TOKEN:20, + WHITESPACE:1, + } + + // empty + assert_lex! { + r#""""""""#, + GRAPHQL_STRING_LITERAL:6 + } +} + +#[test] +fn number() { + assert_lex! { + "5098382", + GRAPHQL_INT_LITERAL:7 + } + + assert_lex! { + "509.382", + GRAPHQL_FLOAT_LITERAL:7 + } + + assert_lex! { + "-", + ERROR_TOKEN:1 + } + + assert_lex! { + "+", + ERROR_TOKEN:1 + } + + assert_lex! { + "-123", + GRAPHQL_INT_LITERAL:4 + } + + assert_lex! { + "123e10", + GRAPHQL_FLOAT_LITERAL:6 + } + + assert_lex! { + "123e+10", + GRAPHQL_FLOAT_LITERAL:7 + } + + assert_lex! { + "123e-10", + GRAPHQL_FLOAT_LITERAL:7 + } + + assert_lex! { + "123E10", + GRAPHQL_FLOAT_LITERAL:6 + } + + assert_lex! { + "123E+10", + GRAPHQL_FLOAT_LITERAL:7 + } + + assert_lex! { + "123E-10", + GRAPHQL_FLOAT_LITERAL:7 + } +} + +#[test] +fn comment() { + assert_lex! { + "# abc", + COMMENT:5 + } + + // newline + assert_lex! { + r#"# abc + "#, + COMMENT:5, + NEWLINE:1, + WHITESPACE:8 + } +} + +#[test] +fn name() { + assert_lex! { + r#"asciiIdentifier"#, + GRAPHQL_NAME:15, + } + + assert_lex! { + r#"with_underscore_here"#, + GRAPHQL_NAME:20, + } + + assert_lex! { + r#"with_unicodeà"#, + GRAPHQL_NAME:12, + ERROR_TOKEN:2, + } + + assert_lex! { + r#"ᨀwith_unicodeàç"#, + ERROR_TOKEN:3, + GRAPHQL_NAME:12, + ERROR_TOKEN:2, + ERROR_TOKEN:2, + } + + assert_lex! { + r#"field }"#, + GRAPHQL_NAME:5, + WHITESPACE:1, + R_CURLY:1, + } + + assert_lex! { + r#"null"#, + NULL_KW:4, + } +} + +#[test] +fn dot() { + assert_lex! { + "...", + DOT3:3 + } + + assert_lex! { + "..", + ERROR_TOKEN:2 + } + + assert_lex! { + ".", + ERROR_TOKEN:1 + } +} diff --git a/crates/biome_graphql_parser/src/lib.rs b/crates/biome_graphql_parser/src/lib.rs new file mode 100644 index 000000000000..6409c3931d27 --- /dev/null +++ b/crates/biome_graphql_parser/src/lib.rs @@ -0,0 +1,110 @@ +//! Extremely fast, lossless, and error tolerant GraphQL Parser. + +use biome_graphql_factory::GraphqlSyntaxFactory; +use biome_graphql_syntax::{GraphqlLanguage, GraphqlRoot, GraphqlSyntaxNode}; +pub use biome_parser::prelude::*; +use biome_parser::tree_sink::LosslessTreeSink; +use biome_rowan::{AstNode, NodeCache}; +use parser::{parse_root, GraphqlParser}; + +mod lexer; +mod parser; +mod token_source; + +pub(crate) type GraphqlLosslessTreeSink<'source> = + LosslessTreeSink<'source, GraphqlLanguage, GraphqlSyntaxFactory>; + +pub fn parse_graphql(source: &str) -> GraphqlParse { + let mut cache = NodeCache::default(); + parse_graphql_with_cache(source, &mut cache) +} + +/// Parses the provided string as Graphql program using the provided node cache. +pub fn parse_graphql_with_cache(source: &str, cache: &mut NodeCache) -> GraphqlParse { + tracing::debug_span!("Parsing phase").in_scope(move || { + let mut parser = GraphqlParser::new(source); + + parse_root(&mut parser); + + let (events, diagnostics, trivia) = parser.finish(); + + let mut tree_sink = GraphqlLosslessTreeSink::with_cache(source, &trivia, cache); + biome_parser::event::process(&mut tree_sink, events, diagnostics); + let (green, diagnostics) = tree_sink.finish(); + + GraphqlParse::new(green, diagnostics) + }) +} + +/// A utility struct for managing the result of a parser job +#[derive(Debug)] +pub struct GraphqlParse { + root: GraphqlSyntaxNode, + diagnostics: Vec, +} + +impl GraphqlParse { + pub fn new(root: GraphqlSyntaxNode, diagnostics: Vec) -> GraphqlParse { + GraphqlParse { root, diagnostics } + } + + /// The syntax node represented by this Parse result + /// + /// ``` + /// # use biome_graphql_parser::parse_graphql; + /// # use biome_graphql_syntax::GraphqlSyntaxKind; + /// # use biome_rowan::{AstNode, AstNodeList, SyntaxError}; + /// + /// # fn main() -> Result<(), SyntaxError> { + /// use biome_graphql_syntax::GraphqlSyntaxKind; + /// let parse = parse_graphql(r#""#); + /// + /// let root_value = parse.tree().definitions(); + /// + /// assert_eq!(root_value.syntax().kind(), GraphqlSyntaxKind::GRAPHQL_DEFINITION_LIST); + /// + /// # Ok(()) + /// # } + /// ``` + pub fn syntax(&self) -> GraphqlSyntaxNode { + self.root.clone() + } + + /// Get the diagnostics which occurred when parsing + pub fn diagnostics(&self) -> &[ParseDiagnostic] { + &self.diagnostics + } + + /// Get the diagnostics which occurred when parsing + pub fn into_diagnostics(self) -> Vec { + self.diagnostics + } + + /// Returns [true] if the parser encountered some errors during the parsing. + pub fn has_errors(&self) -> bool { + self.diagnostics + .iter() + .any(|diagnostic| diagnostic.is_error()) + } + + /// Convert this parse result into a typed AST node. + /// + /// # Panics + /// Panics if the node represented by this parse result mismatches. + pub fn tree(&self) -> GraphqlRoot { + GraphqlRoot::unwrap_cast(self.syntax()) + } +} + +#[cfg(test)] +mod tests { + use crate::parse_graphql; + + #[test] + fn parser_smoke_test() { + let src = r#" +"#; + + let _graphql = parse_graphql(src); + } +} diff --git a/crates/biome_graphql_parser/src/parser/definitions/mod.rs b/crates/biome_graphql_parser/src/parser/definitions/mod.rs new file mode 100644 index 000000000000..0695ea6caa6c --- /dev/null +++ b/crates/biome_graphql_parser/src/parser/definitions/mod.rs @@ -0,0 +1,58 @@ +mod operation; + +use crate::parser::{parse_error::expected_any_definition, GraphqlParser}; +use biome_graphql_syntax::GraphqlSyntaxKind::{self, *}; +use biome_parser::{ + parse_lists::ParseNodeList, parse_recovery::ParseRecovery, parsed_syntax::ParsedSyntax, + prelude::ParsedSyntax::*, Parser, +}; + +use self::operation::{is_at_operation, parse_operation_definition}; + +struct DefinitionListParseRecovery; + +impl ParseRecovery for DefinitionListParseRecovery { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + const RECOVERED_KIND: Self::Kind = GRAPHQL_BOGUS_DEFINITION; + + fn is_at_recovered(&self, p: &mut Self::Parser<'_>) -> bool { + // TODO: recover at any definition + is_at_operation(p) + } +} + +#[derive(Default)] +pub(crate) struct DefinitionList; + +impl ParseNodeList for DefinitionList { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + + const LIST_KIND: Self::Kind = GRAPHQL_DEFINITION_LIST; + + fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { + parse_definition(p) + } + + fn is_at_list_end(&self, p: &mut Self::Parser<'_>) -> bool { + p.at(EOF) + } + + fn recover( + &mut self, + p: &mut Self::Parser<'_>, + parsed_element: ParsedSyntax, + ) -> biome_parser::parse_recovery::RecoveryResult { + parsed_element.or_recover(p, &DefinitionListParseRecovery, expected_any_definition) + } +} + +#[inline] +fn parse_definition(p: &mut GraphqlParser) -> ParsedSyntax { + match p.cur() { + // TODO: parse any definition + _ if is_at_operation(p) => parse_operation_definition(p), + _ => Absent, + } +} diff --git a/crates/biome_graphql_parser/src/parser/definitions/operation.rs b/crates/biome_graphql_parser/src/parser/definitions/operation.rs new file mode 100644 index 000000000000..815e9a778b1f --- /dev/null +++ b/crates/biome_graphql_parser/src/parser/definitions/operation.rs @@ -0,0 +1,137 @@ +use crate::parser::{ + directive::DirectiveList, + parse_error::{expected_any_selection, expected_name, expected_selection_set}, + parse_name, GraphqlParser, +}; +use biome_graphql_syntax::{ + GraphqlSyntaxKind::{self, *}, + T, +}; +use biome_parser::{ + parse_lists::ParseNodeList, parse_recovery::ParseRecovery, parsed_syntax::ParsedSyntax, + prelude::ParsedSyntax::*, token_set, Parser, TokenSet, +}; + +pub(crate) const OPERATION_TYPE: TokenSet = + token_set![T![query], T![mutation], T![subscription]]; + +/// https://spec.graphql.org/October2021/#sec-Language.Operations.Query-shorthand +#[inline] +pub(crate) fn parse_operation_definition(p: &mut GraphqlParser) -> ParsedSyntax { + if !is_at_operation(p) { + return Absent; + } + + if p.at_ts(OPERATION_TYPE) { + // TODO: parse variables + let m = p.start(); + { + let m = p.start(); + p.bump_ts(OPERATION_TYPE); + m.complete(p, GRAPHQL_OPERATION_TYPE); + } + + // we don't need diagnostic here, because name is optional + parse_name(p).ok(); + + DirectiveList.parse_list(p); + parse_selection_set(p).or_add_diagnostic(p, expected_selection_set); + + Present(m.complete(p, GRAPHQL_OPERATION_DEFINITION)) + } else { + parse_selection_set(p) + } +} + +#[inline] +pub(crate) fn is_at_operation(p: &mut GraphqlParser<'_>) -> bool { + p.at_ts(OPERATION_TYPE) || is_at_selection_set(p) +} + +#[inline] +fn parse_selection_set(p: &mut GraphqlParser) -> ParsedSyntax { + let m = p.start(); + p.expect(T!['{']); + SelectionList::new().parse_list(p); + p.expect(T!['}']); + Present(m.complete(p, GRAPHQL_SELECTION_SET)) +} + +struct SelectionListParseRecovery; + +impl ParseRecovery for SelectionListParseRecovery { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + const RECOVERED_KIND: Self::Kind = GRAPHQL_BOGUS_SELECTION; + + fn is_at_recovered(&self, p: &mut Self::Parser<'_>) -> bool { + is_at_selection(p) + } +} + +pub(crate) struct SelectionList; + +impl SelectionList { + pub(crate) fn new() -> Self { + Self + } +} + +impl ParseNodeList for SelectionList { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + + const LIST_KIND: Self::Kind = GRAPHQL_SELECTION_LIST; + + fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { + parse_selection(p) + } + + fn is_at_list_end(&self, p: &mut Self::Parser<'_>) -> bool { + p.at(T!['}']) + } + + fn recover( + &mut self, + p: &mut Self::Parser<'_>, + parsed_element: ParsedSyntax, + ) -> biome_parser::parse_recovery::RecoveryResult { + parsed_element.or_recover(p, &SelectionListParseRecovery, expected_any_selection) + } +} + +#[inline] +fn parse_selection(p: &mut GraphqlParser) -> ParsedSyntax { + // TODO: parse any selection + match p.cur() { + DOT3 => todo!(), + _ if is_at_field(p) => parse_field(p), + _ => Absent, + } +} + +#[inline] +fn parse_field(p: &mut GraphqlParser) -> ParsedSyntax { + // TODO: parse alias, arguments, nested selection set + let m = p.start(); + parse_name(p).or_add_diagnostic(p, expected_name); + DirectiveList.parse_list(p); + Present(m.complete(p, GRAPHQL_FIELD)) +} + +#[inline] +pub(crate) fn is_at_selection_set(p: &mut GraphqlParser<'_>) -> bool { + p.at(T!['{']) +} + +#[inline] +pub(crate) fn is_at_selection(p: &mut GraphqlParser<'_>) -> bool { + // TODO: any selection + is_at_field(p) +} + +#[inline] +pub(crate) fn is_at_field(p: &mut GraphqlParser<'_>) -> bool { + // TODO: handle arguments + p.at(GRAPHQL_NAME) +} diff --git a/crates/biome_graphql_parser/src/parser/directive.rs b/crates/biome_graphql_parser/src/parser/directive.rs new file mode 100644 index 000000000000..b4ce83cb40f4 --- /dev/null +++ b/crates/biome_graphql_parser/src/parser/directive.rs @@ -0,0 +1,67 @@ +use crate::parser::{parse_error::expected_name, parse_name, GraphqlParser}; +use biome_graphql_syntax::{ + GraphqlSyntaxKind::{self, *}, + T, +}; +use biome_parser::{ + parse_lists::ParseNodeList, parse_recovery::ParseRecovery, parsed_syntax::ParsedSyntax, + prelude::ParsedSyntax::*, Parser, +}; + +use super::parse_error::expected_directive; +struct DirectiveListParseRecovery; + +impl ParseRecovery for DirectiveListParseRecovery { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + const RECOVERED_KIND: Self::Kind = GRAPHQL_DIRECTIVE; + + fn is_at_recovered(&self, p: &mut Self::Parser<'_>) -> bool { + is_at_directive(p) + } +} + +#[derive(Default)] +pub(crate) struct DirectiveList; + +impl ParseNodeList for DirectiveList { + type Kind = GraphqlSyntaxKind; + type Parser<'source> = GraphqlParser<'source>; + + const LIST_KIND: Self::Kind = GRAPHQL_DIRECTIVE_LIST; + + fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { + parse_directive(p) + } + + fn is_at_list_end(&self, p: &mut Self::Parser<'_>) -> bool { + !is_at_directive(p) + } + + fn recover( + &mut self, + p: &mut Self::Parser<'_>, + parsed_element: ParsedSyntax, + ) -> biome_parser::parse_recovery::RecoveryResult { + parsed_element.or_recover(p, &DirectiveListParseRecovery, expected_directive) + } +} + +#[inline] +fn is_at_directive(p: &mut GraphqlParser<'_>) -> bool { + p.at(T![@]) +} + +#[inline] +pub(crate) fn parse_directive(p: &mut GraphqlParser) -> ParsedSyntax { + if !p.at(T![@]) { + return Absent; + } + + let m = p.start(); + p.bump(T![@]); + parse_name(p).or_add_diagnostic(p, expected_name); + // TODO: parse arguments + + Present(m.complete(p, GRAPHQL_DIRECTIVE)) +} diff --git a/crates/biome_graphql_parser/src/parser/mod.rs b/crates/biome_graphql_parser/src/parser/mod.rs new file mode 100644 index 000000000000..2ba0522f3101 --- /dev/null +++ b/crates/biome_graphql_parser/src/parser/mod.rs @@ -0,0 +1,90 @@ +mod definitions; +mod directive; +mod parse_error; +use crate::token_source::GraphqlTokenSource; +use biome_graphql_syntax::GraphqlSyntaxKind::{self, *}; +use biome_parser::diagnostic::merge_diagnostics; +use biome_parser::event::Event; +use biome_parser::parse_lists::ParseNodeList; +use biome_parser::prelude::{ParsedSyntax::*, *}; +use biome_parser::token_source::Trivia; +use biome_parser::ParserContext; +use definitions::DefinitionList; + +pub(crate) struct GraphqlParser<'source> { + context: ParserContext, + source: GraphqlTokenSource<'source>, +} + +impl<'source> GraphqlParser<'source> { + pub fn new(source: &'source str) -> Self { + Self { + context: ParserContext::default(), + source: GraphqlTokenSource::from_str(source), + } + } + + #[allow(unused)] + pub fn lookahead(&mut self) -> GraphqlSyntaxKind { + self.source.lookahead() + } + + pub fn finish( + self, + ) -> ( + Vec>, + Vec, + Vec, + ) { + let (trivia, lexer_diagnostics) = self.source.finish(); + let (events, parse_diagnostics) = self.context.finish(); + + let diagnostics = merge_diagnostics(lexer_diagnostics, parse_diagnostics); + + (events, diagnostics, trivia) + } +} + +impl<'source> Parser for GraphqlParser<'source> { + type Kind = GraphqlSyntaxKind; + type Source = GraphqlTokenSource<'source>; + + fn context(&self) -> &ParserContext { + &self.context + } + + fn context_mut(&mut self) -> &mut ParserContext { + &mut self.context + } + + fn source(&self) -> &Self::Source { + &self.source + } + + fn source_mut(&mut self) -> &mut Self::Source { + &mut self.source + } +} + +pub(crate) fn parse_root(p: &mut GraphqlParser) -> CompletedMarker { + let m = p.start(); + + p.eat(UNICODE_BOM); + + DefinitionList.parse_list(p); + + p.expect(EOF); + + m.complete(p, GRAPHQL_ROOT) +} + +#[inline] +fn parse_name(p: &mut GraphqlParser) -> ParsedSyntax { + if !p.at(GRAPHQL_NAME) { + return Absent; + } + + let m = p.start(); + p.bump(GRAPHQL_NAME); + Present(m.complete(p, GRAPHQL_NAME)) +} diff --git a/crates/biome_graphql_parser/src/parser/parse_error.rs b/crates/biome_graphql_parser/src/parser/parse_error.rs new file mode 100644 index 000000000000..a9b4b03aee92 --- /dev/null +++ b/crates/biome_graphql_parser/src/parser/parse_error.rs @@ -0,0 +1,24 @@ +use crate::parser::GraphqlParser; +use biome_parser::diagnostic::{expected_any, expected_node, ParseDiagnostic}; +use biome_rowan::TextRange; + +pub(crate) fn expected_any_definition(p: &GraphqlParser, range: TextRange) -> ParseDiagnostic { + expected_node("definition", range, p) +} + +pub(crate) fn expected_selection_set(p: &GraphqlParser, range: TextRange) -> ParseDiagnostic { + expected_node("selection set", range, p) +} + +pub(crate) fn expected_any_selection(p: &GraphqlParser, range: TextRange) -> ParseDiagnostic { + // TODO: any selection + expected_any(&["field", "fragment spread"], range, p) +} + +pub(crate) fn expected_name(p: &GraphqlParser, range: TextRange) -> ParseDiagnostic { + expected_node("name", range, p) +} + +pub(crate) fn expected_directive(p: &GraphqlParser, range: TextRange) -> ParseDiagnostic { + expected_node("directive", range, p) +} diff --git a/crates/biome_graphql_parser/src/token_source.rs b/crates/biome_graphql_parser/src/token_source.rs new file mode 100644 index 000000000000..d30e963a91c9 --- /dev/null +++ b/crates/biome_graphql_parser/src/token_source.rs @@ -0,0 +1,144 @@ +use crate::lexer::GraphqlLexer; +use biome_graphql_syntax::GraphqlSyntaxKind::{EOF, TOMBSTONE}; +use biome_graphql_syntax::{GraphqlSyntaxKind, TextRange}; +use biome_parser::diagnostic::ParseDiagnostic; +use biome_parser::lexer::Lexer; +use biome_parser::prelude::TokenSource; +use biome_parser::token_source::Trivia; +use biome_rowan::TriviaPieceKind; + +pub(crate) struct GraphqlTokenSource<'source> { + lexer: GraphqlLexer<'source>, + trivia: Vec, + current: NonTriviaToken, + next: Option, +} + +struct NonTriviaToken { + kind: GraphqlSyntaxKind, + range: TextRange, + preceding_line_break: bool, +} + +impl Default for NonTriviaToken { + fn default() -> Self { + Self { + kind: TOMBSTONE, + range: TextRange::default(), + preceding_line_break: false, + } + } +} + +impl<'source> GraphqlTokenSource<'source> { + pub fn from_str(source: &'source str) -> Self { + let lexer = GraphqlLexer::from_str(source); + + let mut source = Self { + lexer, + trivia: Vec::new(), + current: NonTriviaToken::default(), + next: None, + }; + + source.advance_to_next_non_trivia_token(true); + source + } + + fn advance_to_next_non_trivia_token(&mut self, first_token: bool) { + self.current = match self.next.take() { + Some(next) => next, + None => self.next_non_trivia_token(first_token), + } + } + + pub fn lookahead(&mut self) -> GraphqlSyntaxKind { + match self.next.as_ref() { + Some(next) => next.kind, + None if self.current.kind != EOF => { + let next_token = self.next_non_trivia_token(false); + let next_kind = next_token.kind; + self.next = Some(next_token); + next_kind + } + None => EOF, + } + } + + #[must_use] + fn next_non_trivia_token(&mut self, first_token: bool) -> NonTriviaToken { + let mut non_trivia_token = NonTriviaToken::default(); + + let mut trailing = !first_token; + + loop { + let kind = self.lexer.next_token(()); + let trivia_kind = TriviaPieceKind::try_from(kind); + + match trivia_kind { + Err(_) => { + // Not trivia + non_trivia_token.kind = kind; + non_trivia_token.range = self.lexer.current_range(); + break; + } + Ok(trivia_kind) => { + if trivia_kind.is_newline() { + trailing = false; + non_trivia_token.preceding_line_break = true; + } + + self.trivia.push(Trivia::new( + trivia_kind, + self.lexer.current_range(), + trailing, + )); + } + } + } + + non_trivia_token + } +} + +impl<'source> TokenSource for GraphqlTokenSource<'source> { + type Kind = GraphqlSyntaxKind; + + fn current(&self) -> Self::Kind { + self.current.kind + } + + fn current_range(&self) -> TextRange { + self.current.range + } + + fn text(&self) -> &str { + self.lexer.source() + } + + fn has_preceding_line_break(&self) -> bool { + self.current.preceding_line_break + } + + fn bump(&mut self) { + if self.current.kind != EOF { + self.advance_to_next_non_trivia_token(false) + } + } + + fn skip_as_trivia(&mut self) { + if self.current() != EOF { + self.trivia.push(Trivia::new( + TriviaPieceKind::Skipped, + self.current_range(), + false, + )); + + self.advance_to_next_non_trivia_token(false) + } + } + + fn finish(self) -> (Vec, Vec) { + (self.trivia, self.lexer.finish()) + } +} diff --git a/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql b/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql new file mode 100644 index 000000000000..78c59b8f2e9d --- /dev/null +++ b/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql @@ -0,0 +1,7 @@ +query { + likeStory +} + +{ + field +} diff --git a/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql.snap b/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql.snap new file mode 100644 index 000000000000..dc0c07399b60 --- /dev/null +++ b/crates/biome_graphql_parser/tests/graphql_test_suite/ok/operation.graphql.snap @@ -0,0 +1,102 @@ +--- +source: crates/biome_graphql_parser/tests/spec_test.rs +expression: snapshot +--- +## Input +```graphql +query { + likeStory +} + +{ + field +} + +``` + +## AST + +``` +GraphqlRoot { + bom_token: missing (optional), + definitions: GraphqlDefinitionList [ + GraphqlOperationDefinition { + ty: GraphqlOperationType { + value_token: QUERY_KW@0..6 "query" [] [Whitespace(" ")], + }, + name: missing (optional), + variables: missing (optional), + directives: GraphqlDirectiveList [], + selection_set: GraphqlSelectionSet { + l_curly_token: L_CURLY@6..7 "{" [] [], + selections: GraphqlSelectionList [ + GraphqlField { + alias: missing (optional), + name: GraphqlName { + value_token: GRAPHQL_NAME@7..19 "likeStory" [Newline("\n"), Whitespace(" ")] [], + }, + arguments: missing (optional), + directives: GraphqlDirectiveList [], + selection_set: missing (optional), + }, + ], + r_curly_token: R_CURLY@19..21 "}" [Newline("\n")] [], + }, + }, + GraphqlSelectionSet { + l_curly_token: L_CURLY@21..24 "{" [Newline("\n"), Newline("\n")] [], + selections: GraphqlSelectionList [ + GraphqlField { + alias: missing (optional), + name: GraphqlName { + value_token: GRAPHQL_NAME@24..32 "field" [Newline("\n"), Whitespace(" ")] [], + }, + arguments: missing (optional), + directives: GraphqlDirectiveList [], + selection_set: missing (optional), + }, + ], + r_curly_token: R_CURLY@32..34 "}" [Newline("\n")] [], + }, + ], + eof_token: EOF@34..35 "" [Newline("\n")] [], +} +``` + +## CST + +``` +0: GRAPHQL_ROOT@0..35 + 0: (empty) + 1: GRAPHQL_DEFINITION_LIST@0..34 + 0: GRAPHQL_OPERATION_DEFINITION@0..21 + 0: GRAPHQL_OPERATION_TYPE@0..6 + 0: QUERY_KW@0..6 "query" [] [Whitespace(" ")] + 1: (empty) + 2: (empty) + 3: GRAPHQL_DIRECTIVE_LIST@6..6 + 4: GRAPHQL_SELECTION_SET@6..21 + 0: L_CURLY@6..7 "{" [] [] + 1: GRAPHQL_SELECTION_LIST@7..19 + 0: GRAPHQL_FIELD@7..19 + 0: (empty) + 1: GRAPHQL_NAME@7..19 + 0: GRAPHQL_NAME@7..19 "likeStory" [Newline("\n"), Whitespace(" ")] [] + 2: (empty) + 3: GRAPHQL_DIRECTIVE_LIST@19..19 + 4: (empty) + 2: R_CURLY@19..21 "}" [Newline("\n")] [] + 1: GRAPHQL_SELECTION_SET@21..34 + 0: L_CURLY@21..24 "{" [Newline("\n"), Newline("\n")] [] + 1: GRAPHQL_SELECTION_LIST@24..32 + 0: GRAPHQL_FIELD@24..32 + 0: (empty) + 1: GRAPHQL_NAME@24..32 + 0: GRAPHQL_NAME@24..32 "field" [Newline("\n"), Whitespace(" ")] [] + 2: (empty) + 3: GRAPHQL_DIRECTIVE_LIST@32..32 + 4: (empty) + 2: R_CURLY@32..34 "}" [Newline("\n")] [] + 2: EOF@34..35 "" [Newline("\n")] [] + +``` diff --git a/crates/biome_graphql_parser/tests/spec_test.rs b/crates/biome_graphql_parser/tests/spec_test.rs new file mode 100644 index 000000000000..18c7a8fcdb8f --- /dev/null +++ b/crates/biome_graphql_parser/tests/spec_test.rs @@ -0,0 +1,116 @@ +use biome_console::fmt::{Formatter, Termcolor}; +use biome_console::markup; +use biome_diagnostics::display::PrintDiagnostic; +use biome_diagnostics::termcolor; +use biome_diagnostics::DiagnosticExt; +use biome_graphql_parser::parse_graphql; +use biome_rowan::SyntaxKind; +use std::fmt::Write; +use std::fs; +use std::path::Path; + +#[derive(Copy, Clone)] +pub enum ExpectedOutcome { + Pass, + Fail, +} + +pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_str: &str) { + let outcome = match outcome_str { + "ok" => ExpectedOutcome::Pass, + "error" => ExpectedOutcome::Fail, + _ => panic!("Invalid expected outcome {outcome_str}"), + }; + + let test_case_path = Path::new(test_case); + + let file_name = test_case_path + .file_name() + .expect("Expected test to have a file name") + .to_str() + .expect("File name to be valid UTF8"); + + let content = fs::read_to_string(test_case_path) + .expect("Expected test path to be a readable file in UTF8 encoding"); + + let parsed = parse_graphql(&content); + let formatted_ast = format!("{:#?}", parsed.tree()); + + let mut snapshot = String::new(); + writeln!(snapshot, "## Input\n```graphql\n{content}\n```\n").unwrap(); + + writeln!( + snapshot, + r#"## AST + +``` +{formatted_ast} +``` + +## CST + +``` +{:#?} +``` +"#, + parsed.syntax() + ) + .unwrap(); + + let diagnostics = parsed.diagnostics(); + if !diagnostics.is_empty() { + let mut diagnostics_buffer = termcolor::Buffer::no_color(); + + let termcolor = &mut Termcolor(&mut diagnostics_buffer); + let mut formatter = Formatter::new(termcolor); + + for diagnostic in diagnostics { + let error = diagnostic + .clone() + .with_file_path(file_name) + .with_file_source_code(&content); + + formatter + .write_markup(markup! { { PrintDiagnostic::verbose(&error) } }) + .expect("failed to emit diagnostic"); + } + + let formatted_diagnostics = + std::str::from_utf8(diagnostics_buffer.as_slice()).expect("non utf8 in error buffer"); + + if matches!(outcome, ExpectedOutcome::Pass) { + panic!("Expected no errors to be present in a test case that is expected to pass but the following diagnostics are present:\n{formatted_diagnostics}") + } + + writeln!(snapshot, "## Diagnostics\n\n```").unwrap(); + snapshot.write_str(formatted_diagnostics).unwrap(); + + writeln!(snapshot, "```\n").unwrap(); + } + + match outcome { + ExpectedOutcome::Pass => { + let missing_required = formatted_ast.contains("missing (required)"); + if missing_required + || parsed + .syntax() + .descendants() + .any(|node| node.kind().is_bogus()) + { + panic!("Parsed tree of a 'OK' test case should not contain any missing required children or bogus nodes:\n{formatted_ast}"); + } + } + ExpectedOutcome::Fail => { + if parsed.diagnostics().is_empty() { + panic!("Failing test must have diagnostics"); + } + } + } + + insta::with_settings!({ + prepend_module_to_snapshot => false, + snapshot_path => &test_directory, + }, { + insta::assert_snapshot!(file_name, snapshot); + }); +} diff --git a/crates/biome_graphql_parser/tests/spec_tests.rs b/crates/biome_graphql_parser/tests/spec_tests.rs new file mode 100644 index 000000000000..53b31f22ca24 --- /dev/null +++ b/crates/biome_graphql_parser/tests/spec_tests.rs @@ -0,0 +1,13 @@ +#![allow(non_snake_case)] + +mod spec_test; + +mod ok { + //! Tests that are valid GraphQL + tests_macros::gen_tests! {"tests/graphql_test_suite/ok/*.graphql", crate::spec_test::run, "ok"} +} + +mod err { + //! Tests that must fail because they are not valid GraphQL + tests_macros::gen_tests! {"tests/graphql_test_suite/err/*.graphql", crate::spec_test::run, "error"} +} diff --git a/crates/biome_graphql_syntax/src/generated/kind.rs b/crates/biome_graphql_syntax/src/generated/kind.rs index 37e3648a891d..bfd8ec7ef3dc 100644 --- a/crates/biome_graphql_syntax/src/generated/kind.rs +++ b/crates/biome_graphql_syntax/src/generated/kind.rs @@ -71,12 +71,10 @@ pub enum GraphqlSyntaxKind { ERROR_TOKEN, NEWLINE, WHITESPACE, - IDENT, + GRAPHQL_NAME, COMMENT, - MULTILINE_COMMENT, COMMA, - GRAPHQL_NAME, - GRAPHQL_DOCUMENT, + GRAPHQL_ROOT, GRAPHQL_DEFINITION_LIST, GRAPHQL_FRAGMENT_DEFINITION, GRAPHQL_DIRECTIVE_DEFINITION, @@ -102,11 +100,6 @@ pub enum GraphqlSyntaxKind { GRAPHQL_FRAGMENT_NAME, GRAPHQL_TYPE_CONDITION, GRAPHQL_VARIABLE, - GRAPHQL_STRING_VALUE, - GRAPHQL_FLOAT_VALUE, - GRAPHQL_INT_VALUE, - GRAPHQL_BOOLEAN_VALUE, - GRAPHQL_NULL_VALUE, GRAPHQL_ENUM_VALUE, GRAPHQL_LIST_VALUE, GRAPHQL_LIST_VALUE_ELEMENT_LIST, @@ -156,6 +149,11 @@ pub enum GraphqlSyntaxKind { GRAPHQL_INPUT_OBJECT_TYPE_EXTENSION, GRAPHQL_DIRECTIVE_LOCATION_LIST, GRAPHQL_DIRECTIVE_LOCATION, + GRAPHQL_STRING_VALUE, + GRAPHQL_FLOAT_VALUE, + GRAPHQL_INT_VALUE, + GRAPHQL_BOOLEAN_VALUE, + GRAPHQL_NULL_VALUE, GRAPHQL_BOGUS, GRAPHQL_BOGUS_DEFINITION, GRAPHQL_BOGUS_SELECTION, diff --git a/crates/biome_graphql_syntax/src/generated/macros.rs b/crates/biome_graphql_syntax/src/generated/macros.rs index 98647d4a8de8..150d4dd79be5 100644 --- a/crates/biome_graphql_syntax/src/generated/macros.rs +++ b/crates/biome_graphql_syntax/src/generated/macros.rs @@ -58,10 +58,6 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::GraphqlDirectiveLocation::new_unchecked(node) }; $body } - $crate::GraphqlSyntaxKind::GRAPHQL_DOCUMENT => { - let $pattern = unsafe { $crate::GraphqlDocument::new_unchecked(node) }; - $body - } $crate::GraphqlSyntaxKind::GRAPHQL_ENUM_TYPE_DEFINITION => { let $pattern = unsafe { $crate::GraphqlEnumTypeDefinition::new_unchecked(node) }; @@ -243,6 +239,10 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::GraphqlOperationType::new_unchecked(node) }; $body } + $crate::GraphqlSyntaxKind::GRAPHQL_ROOT => { + let $pattern = unsafe { $crate::GraphqlRoot::new_unchecked(node) }; + $body + } $crate::GraphqlSyntaxKind::GRAPHQL_ROOT_OPERATION_TYPE_DEFINITION => { let $pattern = unsafe { $crate::GraphqlRootOperationTypeDefinition::new_unchecked(node) }; diff --git a/crates/biome_graphql_syntax/src/generated/nodes.rs b/crates/biome_graphql_syntax/src/generated/nodes.rs index be6e8e8446c3..907dd55fe23a 100644 --- a/crates/biome_graphql_syntax/src/generated/nodes.rs +++ b/crates/biome_graphql_syntax/src/generated/nodes.rs @@ -476,42 +476,6 @@ pub struct GraphqlDirectiveLocationFields { pub value_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct GraphqlDocument { - pub(crate) syntax: SyntaxNode, -} -impl GraphqlDocument { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { syntax } - } - pub fn as_fields(&self) -> GraphqlDocumentFields { - GraphqlDocumentFields { - graphql_definition_list: self.graphql_definition_list(), - } - } - pub fn graphql_definition_list(&self) -> GraphqlDefinitionList { - support::list(&self.syntax, 0usize) - } -} -#[cfg(feature = "serde")] -impl Serialize for GraphqlDocument { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - self.as_fields().serialize(serializer) - } -} -#[cfg_attr(feature = "serde", derive(Serialize))] -pub struct GraphqlDocumentFields { - pub graphql_definition_list: GraphqlDefinitionList, -} -#[derive(Clone, PartialEq, Eq, Hash)] pub struct GraphqlEnumTypeDefinition { pub(crate) syntax: SyntaxNode, } @@ -2422,6 +2386,52 @@ pub struct GraphqlOperationTypeFields { pub value_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] +pub struct GraphqlRoot { + pub(crate) syntax: SyntaxNode, +} +impl GraphqlRoot { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> GraphqlRootFields { + GraphqlRootFields { + bom_token: self.bom_token(), + definitions: self.definitions(), + eof_token: self.eof_token(), + } + } + pub fn bom_token(&self) -> Option { + support::token(&self.syntax, 0usize) + } + pub fn definitions(&self) -> GraphqlDefinitionList { + support::list(&self.syntax, 1usize) + } + pub fn eof_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } +} +#[cfg(feature = "serde")] +impl Serialize for GraphqlRoot { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[cfg_attr(feature = "serde", derive(Serialize))] +pub struct GraphqlRootFields { + pub bom_token: Option, + pub definitions: GraphqlDefinitionList, + pub eof_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] pub struct GraphqlRootOperationTypeDefinition { pub(crate) syntax: SyntaxNode, } @@ -4174,44 +4184,6 @@ impl From for SyntaxElement { n.syntax.into() } } -impl AstNode for GraphqlDocument { - type Language = Language; - const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(GRAPHQL_DOCUMENT as u16)); - fn can_cast(kind: SyntaxKind) -> bool { - kind == GRAPHQL_DOCUMENT - } - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self { syntax }) - } else { - None - } - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } - fn into_syntax(self) -> SyntaxNode { - self.syntax - } -} -impl std::fmt::Debug for GraphqlDocument { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("GraphqlDocument") - .field("graphql_definition_list", &self.graphql_definition_list()) - .finish() - } -} -impl From for SyntaxNode { - fn from(n: GraphqlDocument) -> SyntaxNode { - n.syntax - } -} -impl From for SyntaxElement { - fn from(n: GraphqlDocument) -> SyntaxElement { - n.syntax.into() - } -} impl AstNode for GraphqlEnumTypeDefinition { type Language = Language; const KIND_SET: SyntaxKindSet = @@ -6006,6 +5978,49 @@ impl From for SyntaxElement { n.syntax.into() } } +impl AstNode for GraphqlRoot { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(GRAPHQL_ROOT as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == GRAPHQL_ROOT + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for GraphqlRoot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GraphqlRoot") + .field( + "bom_token", + &support::DebugOptionalElement(self.bom_token()), + ) + .field("definitions", &self.definitions()) + .field("eof_token", &support::DebugSyntaxResult(self.eof_token())) + .finish() + } +} +impl From for SyntaxNode { + fn from(n: GraphqlRoot) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: GraphqlRoot) -> SyntaxElement { + n.syntax.into() + } +} impl AstNode for GraphqlRootOperationTypeDefinition { type Language = Language; const KIND_SET: SyntaxKindSet = @@ -8256,11 +8271,6 @@ impl std::fmt::Display for GraphqlDirectiveLocation { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for GraphqlDocument { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(self.syntax(), f) - } -} impl std::fmt::Display for GraphqlEnumTypeDefinition { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -8456,6 +8466,11 @@ impl std::fmt::Display for GraphqlOperationType { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for GraphqlRoot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for GraphqlRootOperationTypeDefinition { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/crates/biome_graphql_syntax/src/generated/nodes_mut.rs b/crates/biome_graphql_syntax/src/generated/nodes_mut.rs index a89a068f4f3f..7d844a353842 100644 --- a/crates/biome_graphql_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_graphql_syntax/src/generated/nodes_mut.rs @@ -191,14 +191,6 @@ impl GraphqlDirectiveLocation { ) } } -impl GraphqlDocument { - pub fn with_graphql_definition_list(self, element: GraphqlDefinitionList) -> Self { - Self::unwrap_cast( - self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), - ) - } -} impl GraphqlEnumTypeDefinition { pub fn with_description(self, element: Option) -> Self { Self::unwrap_cast(self.syntax.splice_slots( @@ -1123,6 +1115,26 @@ impl GraphqlOperationType { ) } } +impl GraphqlRoot { + pub fn with_bom_token(self, element: Option) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(element.map(|element| element.into()))), + ) + } + pub fn with_definitions(self, element: GraphqlDefinitionList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_eof_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into()))), + ) + } +} impl GraphqlRootOperationTypeDefinition { pub fn with_operation_type(self, element: GraphqlOperationType) -> Self { Self::unwrap_cast( diff --git a/crates/biome_graphql_syntax/src/lib.rs b/crates/biome_graphql_syntax/src/lib.rs index 4059d3e78d32..e9418bd90da6 100644 --- a/crates/biome_graphql_syntax/src/lib.rs +++ b/crates/biome_graphql_syntax/src/lib.rs @@ -33,7 +33,6 @@ impl GraphqlSyntaxKind { GraphqlSyntaxKind::NEWLINE | GraphqlSyntaxKind::WHITESPACE | GraphqlSyntaxKind::COMMENT - | GraphqlSyntaxKind::MULTILINE_COMMENT | GraphqlSyntaxKind::COMMA ) } @@ -77,7 +76,7 @@ impl biome_rowan::SyntaxKind for GraphqlSyntaxKind { } fn is_root(&self) -> bool { - GraphqlDocument::can_cast(*self) + GraphqlRoot::can_cast(*self) } fn is_list(&self) -> bool { @@ -97,8 +96,9 @@ impl TryFrom for TriviaPieceKind { match value { GraphqlSyntaxKind::NEWLINE => Ok(TriviaPieceKind::Newline), GraphqlSyntaxKind::WHITESPACE => Ok(TriviaPieceKind::Whitespace), + // https://spec.graphql.org/October2021/#sec-Insignificant-Commas + GraphqlSyntaxKind::COMMA => Ok(TriviaPieceKind::Whitespace), GraphqlSyntaxKind::COMMENT => Ok(TriviaPieceKind::SingleLineComment), - GraphqlSyntaxKind::MULTILINE_COMMENT => Ok(TriviaPieceKind::MultiLineComment), _ => unreachable!("Not Trivia"), } } else { diff --git a/crates/biome_graphql_syntax/src/syntax_node.rs b/crates/biome_graphql_syntax/src/syntax_node.rs index 10401c3d8f7b..1681f9083b1a 100644 --- a/crates/biome_graphql_syntax/src/syntax_node.rs +++ b/crates/biome_graphql_syntax/src/syntax_node.rs @@ -5,7 +5,7 @@ //! //! This is a simple wrapper around the `rowan` crate which does most of the heavy lifting and is language agnostic. -use crate::{GraphqlDocument, GraphqlSyntaxKind}; +use crate::{GraphqlRoot, GraphqlSyntaxKind}; use biome_rowan::Language; #[cfg(feature = "serde")] use serde::Serialize; @@ -16,7 +16,7 @@ pub struct GraphqlLanguage; impl Language for GraphqlLanguage { type Kind = GraphqlSyntaxKind; - type Root = GraphqlDocument; + type Root = GraphqlRoot; } pub type GraphqlSyntaxNode = biome_rowan::SyntaxNode; diff --git a/xtask/codegen/graphql.ungram b/xtask/codegen/graphql.ungram index 135f4ad68efd..6b7e70914c66 100644 --- a/xtask/codegen/graphql.ungram +++ b/xtask/codegen/graphql.ungram @@ -45,9 +45,12 @@ GraphqlBogusType = SyntaxElement* // Extension that don't add anything GraphqlBogusExtension = SyntaxElement* -GraphqlName = value: 'ident' +GraphqlName = value: 'graphql_name' -GraphqlDocument = GraphqlDefinitionList +GraphqlRoot = + bom: 'UNICODE_BOM'? + definitions: GraphqlDefinitionList + eof: 'EOF' GraphqlDefinitionList = AnyGraphqlDefinition* diff --git a/xtask/codegen/src/graphql_kind_src.rs b/xtask/codegen/src/graphql_kind_src.rs index 08b78089fc60..b93e8e2d8995 100644 --- a/xtask/codegen/src/graphql_kind_src.rs +++ b/xtask/codegen/src/graphql_kind_src.rs @@ -66,14 +66,12 @@ pub const GRAPHQL_KINDS_SRC: KindsSrc = KindsSrc { "ERROR_TOKEN", "NEWLINE", "WHITESPACE", - "IDENT", + "GRAPHQL_NAME", "COMMENT", - "MULTILINE_COMMENT", "COMMA", ], nodes: &[ - "GRAPHQL_NAME", - "GRAPHQL_DOCUMENT", + "GRAPHQL_ROOT", "GRAPHQL_DEFINITION_LIST", "GRAPHQL_FRAGMENT_DEFINITION", "GRAPHQL_DIRECTIVE_DEFINITION", @@ -99,11 +97,6 @@ pub const GRAPHQL_KINDS_SRC: KindsSrc = KindsSrc { "GRAPHQL_FRAGMENT_NAME", "GRAPHQL_TYPE_CONDITION", "GRAPHQL_VARIABLE", - "GRAPHQL_STRING_VALUE", - "GRAPHQL_FLOAT_VALUE", - "GRAPHQL_INT_VALUE", - "GRAPHQL_BOOLEAN_VALUE", - "GRAPHQL_NULL_VALUE", "GRAPHQL_ENUM_VALUE", "GRAPHQL_LIST_VALUE", "GRAPHQL_LIST_VALUE_ELEMENT_LIST", @@ -153,6 +146,12 @@ pub const GRAPHQL_KINDS_SRC: KindsSrc = KindsSrc { "GRAPHQL_INPUT_OBJECT_TYPE_EXTENSION", "GRAPHQL_DIRECTIVE_LOCATION_LIST", "GRAPHQL_DIRECTIVE_LOCATION", + // literal wrappers: + "GRAPHQL_STRING_VALUE", + "GRAPHQL_FLOAT_VALUE", + "GRAPHQL_INT_VALUE", + "GRAPHQL_BOOLEAN_VALUE", + "GRAPHQL_NULL_VALUE", // Bogus nodes "GRAPHQL_BOGUS", "GRAPHQL_BOGUS_DEFINITION",