Update parser API to merge lexing and parsing (#11494)

## Summary This PR updates the parser API within the `ruff_python_parser` crate. It doesn't change any of the references in this PR. The final API looks like: ```rs pub fn parse_module(source: &str) -> Result<Program<ModModule>, ParseError> {} pub fn parse_expression(source: &str) -> Result<Program<ModExpression>, ParseError> {} pub fn parse_expression_range( source: &str, range: TextRange, ) -> Result<Program<ModExpression>, ParseError> {} pub fn parse(source: &str, mode: Mode) -> Result<Program<Mod>, ParseError> {} // Temporary. The `parse` will replace this function once we update the downstream // tools to work with programs containing syntax error. pub fn parse_unchecked(source: &str, mode: Mode) -> Program<Mod> {} ``` Following is a detailed list of changes: * Make `Program` generic over `T` which can be either `Mod` (enum), `ModModule` or `ModExpression` * Add helper methods to cast `Mod` into `ModModule` or `ModExpression` * Add helper method `Program::into_result` which converts a `Program<T>` into a `Result<Program<T>, ParseError>` where the `Err` variant contains the first `ParseError` * Update `TokenSource` to store the comment ranges * Parser crate depends on `ruff_python_trivia` because of `CommentRanges`. This struct could possibly be moved in the parser crate itself at the end * Move from `parse_expression_starts_at` to `parse_expression_range` which parses the source code at the given range using `Mode::Expression`. Unlike the `starts_at` variant, this accepts the entire source code * Remove all access to the `Lexer` * Remove all `parse_*` functions which works on the tokens provided by the caller ## Test Plan The good news is that the tests in `ruff_python_parser` can be run. So, ``` cargo insta test --package ruff_python_parser ```
astral-sh · May 28, 2024 · 2f1bb50 · 2f1bb50
1 parent ddd7392
commit 2f1bb50
Show file tree

Hide file tree

Showing 10 changed files with 383 additions and 514 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml
@@ -14,6 +14,7 @@ license = { workspace = true }
 
 [dependencies]
 ruff_python_ast = { workspace = true }
+ruff_python_trivia = { workspace = true }
 ruff_text_size = { workspace = true }
 
 anyhow = { workspace = true }

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
@@ -9,23 +9,6 @@
 //! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
 //! start and end [`TextSize`] and a [`Tok`] denoting the token.
 //!
-//! # Example
-//!
-//! ```
-//! use ruff_python_parser::{lexer::lex, Tok, Mode};
-//!
-//! let source = "x = 'RustPython'";
-//! let tokens = lex(source, Mode::Module)
-//!     .map(|tok| tok.expect("Failed to lex"))
-//!     .collect::<Vec<_>>();
-//!
-//! for (token, range) in tokens {
-//!     println!(
-//!         "{token:?}@{range:?}",
-//!     );
-//! }
-//! ```
-//!
 //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
 
 use std::{char, cmp::Ordering, str::FromStr};
@@ -1381,6 +1364,10 @@ impl Token {
         self.kind
     }
 
+    pub(crate) const fn is_comment(self) -> bool {
+        matches!(self.kind, TokenKind::Comment)
+    }
+
     pub(crate) const fn is_trivia(self) -> bool {
         matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
     }