From 2f1bb50cd5932e50051a2c40d4f493e4a549d2cd Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Mon, 27 May 2024 22:23:52 +0530
Subject: [PATCH] Update parser API to merge lexing and parsing (#11494)

## Summary

This PR updates the parser API within the `ruff_python_parser` crate. It
doesn't change any of the references in this PR.

The final API looks like:
```rs
pub fn parse_module(source: &str) -> Result<Program<ModModule>, ParseError> {}

pub fn parse_expression(source: &str) -> Result<Program<ModExpression>, ParseError> {}

pub fn parse_expression_range(
    source: &str,
    range: TextRange,
) -> Result<Program<ModExpression>, ParseError> {}

pub fn parse(source: &str, mode: Mode) -> Result<Program<Mod>, ParseError> {}

// Temporary. The `parse` will replace this function once we update the downstream
// tools to work with programs containing syntax error.
pub fn parse_unchecked(source: &str, mode: Mode) -> Program<Mod> {}
```

Following is a detailed list of changes:
* Make `Program` generic over `T` which can be either `Mod` (enum),
`ModModule` or `ModExpression`
	* Add helper methods to cast `Mod` into `ModModule` or `ModExpression`
* Add helper method `Program::into_result` which converts a `Program<T>`
into a `Result<Program<T>, ParseError>` where the `Err` variant contains
the first `ParseError`
* Update `TokenSource` to store the comment ranges
* Parser crate depends on `ruff_python_trivia` because of
`CommentRanges`. This struct could possibly be moved in the parser crate
itself at the end
* Move from `parse_expression_starts_at` to `parse_expression_range`
which parses the source code at the given range using
`Mode::Expression`. Unlike the `starts_at` variant, this accepts the
entire source code
* Remove all access to the `Lexer`
* Remove all `parse_*` functions which works on the tokens provided by
the caller

## Test Plan

The good news is that the tests in `ruff_python_parser` can be run. So,
```
cargo insta test --package ruff_python_parser
```
---
 Cargo.lock                                    |   1 +
 crates/ruff_python_parser/Cargo.toml          |   1 +
 crates/ruff_python_parser/src/lexer.rs        |  21 +-
 crates/ruff_python_parser/src/lib.rs          | 492 ++++++++----------
 crates/ruff_python_parser/src/parser/mod.rs   |  92 +---
 crates/ruff_python_parser/src/parser/tests.rs |  30 +-
 crates/ruff_python_parser/src/string.rs       | 192 ++++---
 crates/ruff_python_parser/src/token_source.rs |  31 +-
 crates/ruff_python_parser/src/typing.rs       |  19 +-
 crates/ruff_python_parser/tests/fixtures.rs   |  18 +-
 10 files changed, 383 insertions(+), 514 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 96e0ebde771bf..74e3fb12ed4d6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2297,6 +2297,7 @@ dependencies = [
  "itertools 0.12.1",
  "memchr",
  "ruff_python_ast",
+ "ruff_python_trivia",
  "ruff_source_file",
  "ruff_text_size",
  "rustc-hash",
diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml
index fc064e6f0a84d..00ac193efedf0 100644
--- a/crates/ruff_python_parser/Cargo.toml
+++ b/crates/ruff_python_parser/Cargo.toml
@@ -14,6 +14,7 @@ license = { workspace = true }
 
 [dependencies]
 ruff_python_ast = { workspace = true }
+ruff_python_trivia = { workspace = true }
 ruff_text_size = { workspace = true }
 
 anyhow = { workspace = true }
diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 1606d02081882..0c1455923a089 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -9,23 +9,6 @@
 //! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
 //! start and end [`TextSize`] and a [`Tok`] denoting the token.
 //!
-//! # Example
-//!
-//! ```
-//! use ruff_python_parser::{lexer::lex, Tok, Mode};
-//!
-//! let source = "x = 'RustPython'";
-//! let tokens = lex(source, Mode::Module)
-//!     .map(|tok| tok.expect("Failed to lex"))
-//!     .collect::<Vec<_>>();
-//!
-//! for (token, range) in tokens {
-//!     println!(
-//!         "{token:?}@{range:?}",
-//!     );
-//! }
-//! ```
-//!
 //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
 
 use std::{char, cmp::Ordering, str::FromStr};
@@ -1381,6 +1364,10 @@ impl Token {
         self.kind
     }
 
+    pub(crate) const fn is_comment(self) -> bool {
+        matches!(self.kind, TokenKind::Comment)
+    }
+
     pub(crate) const fn is_trivia(self) -> bool {
         matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
     }
diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs
index 1c24b9764f2a1..cf232e8efca53 100644
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@@ -60,67 +60,23 @@
 //! - parser: This module contains an interface to the [Program] and is responsible for generating the AST.
 //! - mode: This module contains the definition of the different modes that the `ruff_python_parser` can be in.
 //!
-//! # Examples
-//!
-//! For example, to get a stream of tokens from a given string, one could do this:
-//!
-//! ```
-//! use ruff_python_parser::{lexer::lex, Mode};
-//!
-//! let python_source = r#"
-//! def is_odd(i):
-//!     return bool(i & 1)
-//! "#;
-//! let mut tokens = lex(python_source, Mode::Module);
-//! assert!(tokens.all(|t| t.is_ok()));
-//! ```
-//!
-//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST:
-//!
-//! ```
-//! use ruff_python_parser::lexer::lex;
-//! use ruff_python_parser::{Mode, parse_tokens};
-//!
-//! let python_source = r#"
-//! def is_odd(i):
-//!    return bool(i & 1)
-//! "#;
-//! let tokens = lex(python_source, Mode::Module);
-//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module);
-//!
-//! assert!(ast.is_ok());
-//! ```
-//!
-//! Alternatively, you can use one of the other `parse_*` functions to parse a string directly without using a specific
-//! mode or tokenizing the source beforehand:
-//!
-//! ```
-//! use ruff_python_parser::parse_suite;
-//!
-//! let python_source = r#"
-//! def is_odd(i):
-//!   return bool(i & 1)
-//! "#;
-//! let ast = parse_suite(python_source);
-//!
-//! assert!(ast.is_ok());
-//! ```
-//!
 //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
 //! [parsing]: https://en.wikipedia.org/wiki/Parsing
 //! [lexer]: crate::lexer
 
-use std::iter::FusedIterator;
+use std::cell::OnceCell;
 use std::ops::Deref;
 
-use crate::lexer::{lex, lex_starts_at, LexResult};
-
 pub use crate::error::{FStringErrorType, ParseError, ParseErrorType};
-pub use crate::parser::Program;
+pub use crate::lexer::Token;
 pub use crate::token::{Tok, TokenKind};
 
-use ruff_python_ast::{Expr, Mod, ModModule, PySourceType, Suite};
-use ruff_text_size::{Ranged, TextRange, TextSize};
+use crate::parser::Parser;
+
+use itertools::Itertools;
+use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite};
+use ruff_python_trivia::CommentRanges;
+use ruff_text_size::{Ranged, TextRange};
 
 mod error;
 pub mod lexer;
@@ -131,7 +87,7 @@ mod token_set;
 mod token_source;
 pub mod typing;
 
-/// Parse a full Python program usually consisting of multiple lines.
+/// Parse a full Python module usually consisting of multiple lines.
 ///
 /// This is a convenience function that can be used to parse a full Python program without having to
 /// specify the [`Mode`] or the location. It is probably what you want to use most of the time.
@@ -141,37 +97,7 @@ pub mod typing;
 /// For example, parsing a simple function definition and a call to that function:
 ///
 /// ```
-/// use ruff_python_parser::parse_program;
-///
-/// let source = r#"
-/// def foo():
-///    return 42
-///
-/// print(foo())
-/// "#;
-///
-/// let program = parse_program(source);
-/// assert!(program.is_ok());
-/// ```
-pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
-    let lexer = lex(source, Mode::Module);
-    match parse_tokens(lexer.collect(), source, Mode::Module)? {
-        Mod::Module(m) => Ok(m),
-        Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
-    }
-}
-
-/// Parse a full Python program into a [`Suite`].
-///
-/// This function is similar to [`parse_program`] except that it returns the module body
-/// instead of the module itself.
-///
-/// # Example
-///
-/// For example, parsing a simple function definition and a call to that function:
-///
-/// ```
-/// use ruff_python_parser::parse_suite;
+/// use ruff_python_parser::parse_module;
 ///
 /// let source = r#"
 /// def foo():
@@ -180,11 +106,15 @@ pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
 /// print(foo())
 /// "#;
 ///
-/// let body = parse_suite(source);
-/// assert!(body.is_ok());
+/// let module = parse_module(source);
+/// assert!(module.is_ok());
 /// ```
-pub fn parse_suite(source: &str) -> Result<Suite, ParseError> {
-    parse_program(source).map(|m| m.body)
+pub fn parse_module(source: &str) -> Result<Program<ModModule>, ParseError> {
+    Parser::new(source, Mode::Module)
+        .parse()
+        .try_into_module()
+        .unwrap()
+        .into_result()
 }
 
 /// Parses a single Python expression.
@@ -202,37 +132,40 @@ pub fn parse_suite(source: &str) -> Result<Suite, ParseError> {
 /// let expr = parse_expression("1 + 2");
 /// assert!(expr.is_ok());
 /// ```
-pub fn parse_expression(source: &str) -> Result<Expr, ParseError> {
-    let lexer = lex(source, Mode::Expression).collect();
-    match parse_tokens(lexer, source, Mode::Expression)? {
-        Mod::Expression(expression) => Ok(*expression.body),
-        Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
-    }
+pub fn parse_expression(source: &str) -> Result<Program<ModExpression>, ParseError> {
+    Parser::new(source, Mode::Expression)
+        .parse()
+        .try_into_expression()
+        .unwrap()
+        .into_result()
 }
 
-/// Parses a Python expression from a given location.
+/// Parses a Python expression for the given range in the source.
 ///
-/// This function allows to specify the location of the expression in the source code, other than
+/// This function allows to specify the range of the expression in the source code, other than
 /// that, it behaves exactly like [`parse_expression`].
 ///
 /// # Example
 ///
-/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different,
-/// somewhat silly, location:
+/// Parsing one of the numeric literal which is part of an addition expression:
 ///
 /// ```
-/// use ruff_python_parser::parse_expression_starts_at;
-/// # use ruff_text_size::TextSize;
+/// use ruff_python_parser::parse_expression_range;
+/// # use ruff_text_size::{TextRange, TextSize};
 ///
-/// let expr = parse_expression_starts_at("1 + 2", TextSize::from(400));
-/// assert!(expr.is_ok());
+/// let program = parse_expression_range("11 + 22 + 33", TextRange::new(TextSize::new(5), TextSize::new(7)));
+/// assert!(program.is_ok());
 /// ```
-pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result<Expr, ParseError> {
-    let lexer = lex_starts_at(source, Mode::Module, offset).collect();
-    match parse_tokens(lexer, source, Mode::Expression)? {
-        Mod::Expression(expression) => Ok(*expression.body),
-        Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
-    }
+pub fn parse_expression_range(
+    source: &str,
+    range: TextRange,
+) -> Result<Program<ModExpression>, ParseError> {
+    let source = &source[..range.end().to_usize()];
+    Parser::new_starts_at(source, Mode::Expression, range.start())
+        .parse()
+        .try_into_expression()
+        .unwrap()
+        .into_result()
 }
 
 /// Parse the given Python source code using the specified [`Mode`].
@@ -249,8 +182,8 @@ pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result<Expr
 /// ```
 /// use ruff_python_parser::{Mode, parse};
 ///
-/// let expr = parse("1 + 2", Mode::Expression);
-/// assert!(expr.is_ok());
+/// let program = parse("1 + 2", Mode::Expression);
+/// assert!(program.is_ok());
 /// ```
 ///
 /// Alternatively, we can parse a full Python program consisting of multiple lines:
@@ -281,224 +214,213 @@ pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result<Expr
 /// let program = parse(source, Mode::Ipython);
 /// assert!(program.is_ok());
 /// ```
-pub fn parse(source: &str, mode: Mode) -> Result<Mod, ParseError> {
-    let lxr = lexer::lex(source, mode);
-    parse_tokens(lxr.collect(), source, mode)
+pub fn parse(source: &str, mode: Mode) -> Result<Program<Mod>, ParseError> {
+    parse_unchecked(source, mode).into_result()
 }
 
-/// Parse the given Python source code using the specified [`Mode`] and [`TextSize`].
-///
-/// This function allows to specify the location of the source code, other than
-/// that, it behaves exactly like [`parse`].
-///
-/// # Example
-///
-/// ```
-/// # use ruff_text_size::TextSize;
-/// use ruff_python_parser::{Mode, parse_starts_at};
-///
-/// let source = r#"
-/// def fib(i):
-///    a, b = 0, 1
-///    for _ in range(i):
-///       a, b = b, a + b
-///    return a
+/// Parse the given Python source code using the specified [`Mode`].
 ///
-/// print(fib(42))
-/// "#;
-/// let program = parse_starts_at(source, Mode::Module, TextSize::from(0));
-/// assert!(program.is_ok());
-/// ```
-pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result<Mod, ParseError> {
-    let lxr = lexer::lex_starts_at(source, mode, offset);
-    parse_tokens(lxr.collect(), source, mode)
+/// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`]
+/// and returns the [`Program`] as is.
+pub fn parse_unchecked(source: &str, mode: Mode) -> Program<Mod> {
+    Parser::new(source, mode).parse()
 }
 
-/// Parse an iterator of [`LexResult`]s using the specified [`Mode`].
-///
-/// This could allow you to perform some preprocessing on the tokens before parsing them.
-///
-/// # Example
-///
-/// As an example, instead of parsing a string, we can parse a list of tokens after we generate
-/// them using the [`lexer::lex`] function:
-///
-/// ```
-/// use ruff_python_parser::lexer::lex;
-/// use ruff_python_parser::{Mode, parse_tokens};
-///
-/// let source = "1 + 2";
-/// let tokens = lex(source, Mode::Expression);
-/// let expr = parse_tokens(tokens.collect(), source, Mode::Expression);
-/// assert!(expr.is_ok());
-/// ```
-pub fn parse_tokens(tokens: Vec<LexResult>, source: &str, mode: Mode) -> Result<Mod, ParseError> {
-    let program = Program::parse_tokens(source, tokens, mode);
-    if program.is_valid() {
-        Ok(program.into_ast())
-    } else {
-        Err(program.into_errors().into_iter().next().unwrap())
-    }
+/// Parse the given Python source code using the specificed [`PySourceType`].
+pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Program<ModModule> {
+    // SAFETY: Safe because `PySourceType` always parses to a `ModModule`
+    Parser::new(source, source_type.as_mode())
+        .parse()
+        .try_into_module()
+        .unwrap()
 }
 
-/// Tokens represents a vector of [`LexResult`].
-///
-/// This should only include tokens up to and including the first error. This struct is created
-/// by the [`tokenize`] function.
-#[derive(Debug, Clone)]
-pub struct Tokens(Vec<LexResult>);
+/// Represents the parsed source code.
+#[derive(Debug)]
+pub struct Program<T> {
+    syntax: T,
+    tokens: Tokens,
+    errors: Vec<ParseError>,
+    comment_ranges: CommentRanges,
+}
 
-impl Tokens {
-    /// Returns an iterator over the [`TokenKind`] and the range corresponding to the tokens.
-    pub fn kinds(&self) -> TokenKindIter {
-        TokenKindIter::new(&self.0)
+impl<T> Program<T> {
+    /// Returns the syntax node represented by this program.
+    pub fn syntax(&self) -> &T {
+        &self.syntax
     }
 
-    /// Returns an iterator over the [`TokenKind`] and its range for all the tokens that are
-    /// within the given `range`.
-    ///
-    /// The start and end position of the given range should correspond to the start position of
-    /// the first token and the end position of the last token in the returned iterator.
-    ///
-    /// For example, if the struct contains the following tokens:
-    /// ```txt
-    /// (Def, 0..3)
-    /// (Name, 4..7)
-    /// (Lpar, 7..8)
-    /// (Rpar, 8..9)
-    /// (Colon, 9..10)
-    /// (Ellipsis, 11..14)
-    /// (Newline, 14..14)
-    /// ```
-    ///
-    /// Then, the range `4..10` returns an iterator which yields `Name`, `Lpar`, `Rpar`, and
-    /// `Colon` token. But, if the given position doesn't match any of the tokens, an empty
-    /// iterator is returned.
-    pub fn kinds_within_range<T: Ranged>(&self, ranged: T) -> TokenKindIter {
-        let Ok(start_index) = self.binary_search_by_key(&ranged.start(), |result| match result {
-            Ok((_, range)) => range.start(),
-            Err(error) => error.location().start(),
-        }) else {
-            return TokenKindIter::default();
-        };
+    /// Returns all the tokens for the program.
+    pub fn tokens(&self) -> &Tokens {
+        &self.tokens
+    }
 
-        let Ok(end_index) = self.binary_search_by_key(&ranged.end(), |result| match result {
-            Ok((_, range)) => range.end(),
-            Err(error) => error.location().end(),
-        }) else {
-            return TokenKindIter::default();
-        };
+    /// Returns a list of syntax errors found during parsing.
+    pub fn errors(&self) -> &[ParseError] {
+        &self.errors
+    }
 
-        TokenKindIter::new(self.get(start_index..=end_index).unwrap_or(&[]))
+    /// Returns the comment ranges for the program.
+    pub fn comment_ranges(&self) -> &CommentRanges {
+        &self.comment_ranges
     }
 
-    /// Consumes the [`Tokens`], returning the underlying vector of [`LexResult`].
-    pub fn into_inner(self) -> Vec<LexResult> {
-        self.0
+    /// Consumes the [`Program`] and returns the syntax node represented by this program.
+    pub fn into_syntax(self) -> T {
+        self.syntax
     }
-}
 
-impl Deref for Tokens {
-    type Target = [LexResult];
+    /// Consumes the [`Program`] and returns a list of syntax errors found during parsing.
+    pub fn into_errors(self) -> Vec<ParseError> {
+        self.errors
+    }
 
-    fn deref(&self) -> &Self::Target {
-        &self.0
+    /// Returns `true` if the program is valid i.e., it has no syntax errors.
+    pub fn is_valid(&self) -> bool {
+        self.errors.is_empty()
     }
-}
 
-/// An iterator over the [`TokenKind`] and the corresponding range.
-///
-/// This struct is created by the [`Tokens::kinds`] method.
-#[derive(Clone, Default)]
-pub struct TokenKindIter<'a> {
-    inner: std::iter::Flatten<std::slice::Iter<'a, LexResult>>,
+    /// Transforms the [`Program`] into a [`Result`], returning [`Ok`] if the program has no syntax
+    /// errors, or [`Err`] containing the first [`ParseError`] encountered.
+    pub fn into_result(self) -> Result<Program<T>, ParseError> {
+        if self.is_valid() {
+            Ok(self)
+        } else {
+            Err(self.into_errors().into_iter().next().unwrap())
+        }
+    }
 }
 
-impl<'a> TokenKindIter<'a> {
-    /// Create a new iterator from a slice of [`LexResult`].
-    pub fn new(tokens: &'a [LexResult]) -> Self {
-        Self {
-            inner: tokens.iter().flatten(),
+impl Program<Mod> {
+    /// Attempts to convert the [`Program<Mod>`] into a [`Program<ModModule>`].
+    ///
+    /// This method checks if the `syntax` field of the program is a [`Mod::Module`]. If it is, the
+    /// method returns [`Some(Program<ModModule>)`] with the contained module. Otherwise, it
+    /// returns [`None`].
+    ///
+    /// [`Some(Program<ModModule>)`]: Some
+    fn try_into_module(self) -> Option<Program<ModModule>> {
+        match self.syntax {
+            Mod::Module(module) => Some(Program {
+                syntax: module,
+                tokens: self.tokens,
+                errors: self.errors,
+                comment_ranges: self.comment_ranges,
+            }),
+            Mod::Expression(_) => None,
         }
     }
 
-    /// Return the next value without advancing the iterator.
-    pub fn peek(&mut self) -> Option<(TokenKind, TextRange)> {
-        self.clone().next()
+    /// Attempts to convert the [`Program<Mod>`] into a [`Program<ModExpression>`].
+    ///
+    /// This method checks if the `syntax` field of the program is a [`Mod::Expression`]. If it is,
+    /// the method returns [`Some(Program<ModExpression>)`] with the contained expression.
+    /// Otherwise, it returns [`None`].
+    ///
+    /// [`Some(Program<ModExpression>)`]: Some
+    fn try_into_expression(self) -> Option<Program<ModExpression>> {
+        match self.syntax {
+            Mod::Module(_) => None,
+            Mod::Expression(expression) => Some(Program {
+                syntax: expression,
+                tokens: self.tokens,
+                errors: self.errors,
+                comment_ranges: self.comment_ranges,
+            }),
+        }
     }
 }
 
-impl Iterator for TokenKindIter<'_> {
-    type Item = (TokenKind, TextRange);
+impl Program<ModModule> {
+    /// Returns the module body contained in this program as a [`Suite`].
+    pub fn suite(&self) -> &Suite {
+        &self.syntax.body
+    }
 
-    fn next(&mut self) -> Option<Self::Item> {
-        let &(ref tok, range) = self.inner.next()?;
-        Some((TokenKind::from_token(tok), range))
+    /// Consumes the [`Program`] and returns the module body as a [`Suite`].
+    pub fn into_suite(self) -> Suite {
+        self.syntax.body
     }
 }
 
-impl FusedIterator for TokenKindIter<'_> {}
+impl Program<ModExpression> {
+    /// Returns the expression contained in this program.
+    pub fn expr(&self) -> &Expr {
+        &self.syntax.body
+    }
 
-impl DoubleEndedIterator for TokenKindIter<'_> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        let &(ref tok, range) = self.inner.next_back()?;
-        Some((TokenKind::from_token(tok), range))
+    /// Consumes the [`Program`] and returns the parsed [`Expr`].
+    pub fn into_expr(self) -> Expr {
+        *self.syntax.body
     }
 }
 
-/// Collect tokens up to and including the first error.
-pub fn tokenize(contents: &str, mode: Mode) -> Tokens {
-    let mut tokens: Vec<LexResult> = allocate_tokens_vec(contents);
-    for tok in lexer::lex(contents, mode) {
-        let is_err = tok.is_err();
-        tokens.push(tok);
-        if is_err {
-            break;
-        }
-    }
+/// Tokens represents a vector of lexed [`Token`].
+#[derive(Debug)]
+pub struct Tokens {
+    raw: Vec<Token>,
 
-    Tokens(tokens)
+    /// Index of the first [`TokenKind::Unknown`] token or the length of the token vector.
+    first_unknown_or_len: OnceCell<usize>,
 }
 
-/// Tokenizes all tokens.
-///
-/// It differs from [`tokenize`] in that it tokenizes all tokens and doesn't stop
-/// after the first `Err`.
-pub fn tokenize_all(contents: &str, mode: Mode) -> Vec<LexResult> {
-    let mut tokens = allocate_tokens_vec(contents);
-    for token in lexer::lex(contents, mode) {
-        tokens.push(token);
+impl Tokens {
+    pub(crate) fn new(tokens: Vec<Token>) -> Tokens {
+        Tokens {
+            raw: tokens,
+            first_unknown_or_len: OnceCell::new(),
+        }
     }
-    tokens
-}
 
-/// Allocates a [`Vec`] with an approximated capacity to fit all tokens
-/// of `contents`.
-///
-/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation.
-pub fn allocate_tokens_vec(contents: &str) -> Vec<LexResult> {
-    Vec::with_capacity(approximate_tokens_lower_bound(contents))
-}
+    /// Returns a slice of tokens up to (and excluding) the first [`TokenKind::Unknown`] token or
+    /// all the tokens if there is none.
+    pub fn up_to_first_unknown(&self) -> &[Token] {
+        let end = *self.first_unknown_or_len.get_or_init(|| {
+            self.raw
+                .iter()
+                .find_position(|token| token.kind() == TokenKind::Unknown)
+                .map(|(idx, _)| idx)
+                .unwrap_or_else(|| self.raw.len())
+        });
+        &self.raw[..end]
+    }
 
-/// Approximates the number of tokens when lexing `contents`.
-fn approximate_tokens_lower_bound(contents: &str) -> usize {
-    contents.len().saturating_mul(15) / 100
+    /// Returns a slice of the [`Token`] that are within the given `range`.
+    ///
+    /// The start and end position of the given range should correspond to the start position of
+    /// the first token and the end position of the last token in the returned slice.
+    ///
+    /// For example, considering the following tokens and their corresponding range:
+    ///
+    /// ```txt
+    /// Def        0..3
+    /// Name       4..7
+    /// Lpar       7..8
+    /// Rpar       8..9
+    /// Colon      9..10
+    /// Ellipsis  11..14
+    /// Newline   14..14
+    /// ```
+    ///
+    /// The range `4..10` would return a slice of `Name`, `Lpar`, `Rpar`, and `Colon` tokens. But,
+    /// if either the start or end position of the given range doesn't match any of the tokens
+    /// (like `5..10` or `4..12`), the returned slice will be empty.
+    pub fn tokens_in_range(&self, range: TextRange) -> &[Token] {
+        let Ok(start) = self.binary_search_by_key(&range.start(), Ranged::start) else {
+            return &[];
+        };
+        let Ok(end) = self[start..].binary_search_by_key(&range.end(), Ranged::end) else {
+            return &[];
+        };
+        &self[start..=start + end]
+    }
 }
 
-/// Parse a full Python program from its tokens.
-pub fn parse_program_tokens(
-    tokens: Tokens,
-    source: &str,
-    is_jupyter_notebook: bool,
-) -> anyhow::Result<Suite, ParseError> {
-    let mode = if is_jupyter_notebook {
-        Mode::Ipython
-    } else {
-        Mode::Module
-    };
-    match parse_tokens(tokens.into_inner(), source, mode)? {
-        Mod::Module(m) => Ok(m.body),
-        Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
+impl Deref for Tokens {
+    type Target = [Token];
+
+    fn deref(&self) -> &Self::Target {
+        &self.raw
     }
 }
 
diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs
index 8dc54ac9de6cc..a2e1ef687c054 100644
--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@@ -2,15 +2,16 @@ use std::cmp::Ordering;
 
 use bitflags::bitflags;
 
-use ruff_python_ast as ast;
+use ruff_python_ast::{Mod, ModExpression, ModModule};
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
-use crate::lexer::{Token, TokenValue};
+use crate::lexer::TokenValue;
 use crate::parser::expression::ExpressionContext;
 use crate::parser::progress::{ParserProgress, TokenId};
 use crate::token_set::TokenSet;
 use crate::token_source::{TokenSource, TokenSourceCheckpoint};
 use crate::{Mode, ParseError, ParseErrorType, TokenKind};
+use crate::{Program, Tokens};
 
 mod expression;
 mod helpers;
@@ -21,57 +22,6 @@ mod statement;
 #[cfg(test)]
 mod tests;
 
-/// Represents the parsed source code.
-///
-/// This includes the AST and all of the errors encountered during parsing.
-#[derive(Debug)]
-pub struct Program {
-    ast: ast::Mod,
-    tokens: Vec<Token>,
-    parse_errors: Vec<ParseError>,
-}
-
-impl Program {
-    /// Returns the parsed AST.
-    pub fn ast(&self) -> &ast::Mod {
-        &self.ast
-    }
-
-    /// Returns all the tokens for the program.
-    pub fn tokens(&self) -> &[Token] {
-        &self.tokens
-    }
-
-    /// Returns a list of syntax errors found during parsing.
-    pub fn errors(&self) -> &[ParseError] {
-        &self.parse_errors
-    }
-
-    /// Consumes the [`Program`] and returns the parsed AST.
-    pub fn into_ast(self) -> ast::Mod {
-        self.ast
-    }
-
-    /// Consumes the [`Program`] and returns a list of syntax errors found during parsing.
-    pub fn into_errors(self) -> Vec<ParseError> {
-        self.parse_errors
-    }
-
-    /// Returns `true` if the program is valid i.e., it has no syntax errors.
-    pub fn is_valid(&self) -> bool {
-        self.parse_errors.is_empty()
-    }
-
-    /// Parse the given Python source code using the specified [`Mode`].
-    pub fn parse(source: &str, mode: Mode) -> Program {
-        Parser::new(source, mode).parse_program()
-    }
-
-    pub fn parse_starts_at(source: &str, mode: Mode, start_offset: TextSize) -> Program {
-        Parser::new_starts_at(source, mode, start_offset).parse_program()
-    }
-}
-
 #[derive(Debug)]
 pub(crate) struct Parser<'src> {
     source: &'src str,
@@ -122,13 +72,13 @@ impl<'src> Parser<'src> {
     }
 
     /// Consumes the [`Parser`] and returns the parsed [`Program`].
-    pub(crate) fn parse_program(mut self) -> Program {
-        let ast = match self.mode {
-            Mode::Expression => ast::Mod::Expression(self.parse_single_expression()),
-            Mode::Module | Mode::Ipython => ast::Mod::Module(self.parse_module()),
+    pub(crate) fn parse(mut self) -> Program<Mod> {
+        let syntax = match self.mode {
+            Mode::Expression => Mod::Expression(self.parse_single_expression()),
+            Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()),
         };
 
-        self.finish(ast)
+        self.finish(syntax)
     }
 
     /// Parses a single expression.
@@ -139,7 +89,7 @@ impl<'src> Parser<'src> {
     ///
     /// After parsing a single expression, an error is reported and all remaining tokens are
     /// dropped by the parser.
-    fn parse_single_expression(&mut self) -> ast::ModExpression {
+    fn parse_single_expression(&mut self) -> ModExpression {
         let start = self.node_start();
         let parsed_expr = self.parse_expression_list(ExpressionContext::default());
 
@@ -165,7 +115,7 @@ impl<'src> Parser<'src> {
 
         self.bump(TokenKind::EndOfFile);
 
-        ast::ModExpression {
+        ModExpression {
             body: Box::new(parsed_expr.expr),
             range: self.node_range(start),
         }
@@ -174,7 +124,7 @@ impl<'src> Parser<'src> {
     /// Parses a Python module.
     ///
     /// This is to be used for [`Mode::Module`] and [`Mode::Ipython`].
-    fn parse_module(&mut self) -> ast::ModModule {
+    fn parse_module(&mut self) -> ModModule {
         let body = self.parse_list_into_vec(
             RecoveryContextKind::ModuleStatements,
             Parser::parse_statement,
@@ -182,13 +132,13 @@ impl<'src> Parser<'src> {
 
         self.bump(TokenKind::EndOfFile);
 
-        ast::ModModule {
+        ModModule {
             body,
             range: TextRange::new(self.start_offset, self.current_token_range().end()),
         }
     }
 
-    fn finish(self, ast: ast::Mod) -> Program {
+    fn finish(self, syntax: Mod) -> Program<Mod> {
         assert_eq!(
             self.current_token_kind(),
             TokenKind::EndOfFile,
@@ -197,16 +147,17 @@ impl<'src> Parser<'src> {
 
         // TODO consider re-integrating lexical error handling into the parser?
         let parse_errors = self.errors;
-        let (tokens, lex_errors) = self.tokens.finish();
+        let (tokens, comment_ranges, lex_errors) = self.tokens.finish();
 
         // Fast path for when there are no lex errors.
         // There's no fast path for when there are no parse errors because a lex error
         // always results in a parse error.
         if lex_errors.is_empty() {
             return Program {
-                ast,
-                tokens,
-                parse_errors,
+                syntax,
+                tokens: Tokens::new(tokens),
+                comment_ranges,
+                errors: parse_errors,
             };
         }
 
@@ -235,9 +186,10 @@ impl<'src> Parser<'src> {
         merged.extend(lex_errors.map(ParseError::from));
 
         Program {
-            ast,
-            tokens,
-            parse_errors: merged,
+            syntax,
+            tokens: Tokens::new(tokens),
+            comment_ranges,
+            errors: merged,
         }
     }
 
diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs
index ec23d01d277f5..8de198b8eb253 100644
--- a/crates/ruff_python_parser/src/parser/tests.rs
+++ b/crates/ruff_python_parser/src/parser/tests.rs
@@ -1,4 +1,4 @@
-use crate::{lex, parse, parse_expression, parse_suite, parse_tokens, Mode};
+use crate::{parse, parse_expression, parse_module, Mode};
 
 #[test]
 fn test_modes() {
@@ -45,23 +45,23 @@ fn test_expr_mode_valid_syntax() {
     let source = "first
 
 ";
-    let expr = parse_expression(source).unwrap();
+    let program = parse_expression(source).unwrap();
 
-    insta::assert_debug_snapshot!(expr);
+    insta::assert_debug_snapshot!(program.expr());
 }
 
 #[test]
 fn test_unicode_aliases() {
     // https://github.com/RustPython/RustPython/issues/4566
     let source = r#"x = "\N{BACKSPACE}another cool trick""#;
-    let parse_ast = parse_suite(source).unwrap();
+    let suite = parse_module(source).unwrap().into_suite();
 
-    insta::assert_debug_snapshot!(parse_ast);
+    insta::assert_debug_snapshot!(suite);
 }
 
 #[test]
 fn test_ipython_escape_commands() {
-    let parse_ast = parse(
+    let program = parse(
         r"
 # Normal Python code
 (
@@ -132,21 +132,5 @@ foo.bar[0].baz[2].egg??
         Mode::Ipython,
     )
     .unwrap();
-    insta::assert_debug_snapshot!(parse_ast);
-}
-
-#[test]
-fn test_ipython_escape_command_parse_error() {
-    let source = r"
-a = 1
-%timeit a == 1
-    "
-    .trim();
-    let lxr = lex(source, Mode::Ipython);
-    let parse_err = parse_tokens(lxr.collect(), source, Mode::Module).unwrap_err();
-    assert_eq!(
-        parse_err.to_string(),
-        "IPython escape commands are only allowed in `Mode::Ipython` at byte range 6..20"
-            .to_string()
-    );
+    insta::assert_debug_snapshot!(program.syntax());
 }
diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs
index bd206d5e46573..772034527a218 100644
--- a/crates/ruff_python_parser/src/string.rs
+++ b/crates/ruff_python_parser/src/string.rs
@@ -469,13 +469,19 @@ pub(crate) fn parse_fstring_literal_element(
 
 #[cfg(test)]
 mod tests {
+    use ruff_python_ast::Suite;
+
     use crate::lexer::LexicalErrorType;
-    use crate::{parse_suite, FStringErrorType, ParseErrorType, Suite};
+    use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Program};
 
     const WINDOWS_EOL: &str = "\r\n";
     const MAC_EOL: &str = "\r";
     const UNIX_EOL: &str = "\n";
 
+    fn parse_suite(source: &str) -> Result<Suite, ParseError> {
+        parse_module(source).map(Program::into_suite)
+    }
+
     fn string_parser_escaped_eol(eol: &str) -> Suite {
         let source = format!(r"'text \{eol}more text'");
         parse_suite(&source).unwrap()
@@ -483,73 +489,69 @@ mod tests {
 
     #[test]
     fn test_string_parser_escaped_unix_eol() {
-        let parse_ast = string_parser_escaped_eol(UNIX_EOL);
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = string_parser_escaped_eol(UNIX_EOL);
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_string_parser_escaped_mac_eol() {
-        let parse_ast = string_parser_escaped_eol(MAC_EOL);
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = string_parser_escaped_eol(MAC_EOL);
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_string_parser_escaped_windows_eol() {
-        let parse_ast = string_parser_escaped_eol(WINDOWS_EOL);
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = string_parser_escaped_eol(WINDOWS_EOL);
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring() {
         let source = r#"f"{a}{ b }{{foo}}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_nested_spec() {
         let source = r#"f"{foo:{spec}}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_not_nested_spec() {
         let source = r#"f"{foo:spec}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_empty_fstring() {
-        insta::assert_debug_snapshot!(parse_suite(r#"f"""#,).unwrap());
+        let source = r#"f"""#;
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_parse_self_documenting_base() {
         let source = r#"f"{user=}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_parse_self_documenting_base_more() {
         let source = r#"f"mix {user=} with text and {second=}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_parse_self_documenting_format() {
         let source = r#"f"{user=:>10}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     fn parse_fstring_error(source: &str) -> FStringErrorType {
@@ -577,240 +579,236 @@ mod tests {
         // error appears after the unexpected `FStringMiddle` token, which is between the
         // `:` and the `{`.
         // assert_eq!(parse_fstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses);
-        assert!(parse_suite(r#"f"{class}""#,).is_err());
+        assert!(parse_suite(r#"f"{class}""#).is_err());
     }
 
     #[test]
     fn test_parse_fstring_not_equals() {
         let source = r#"f"{1 != 2}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_equals() {
         let source = r#"f"{42 == 42}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_self_doc_prec_space() {
         let source = r#"f"{x   =}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_self_doc_trailing_space() {
         let source = r#"f"{x=   }""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_yield_expr() {
         let source = r#"f"{yield}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_string_concat() {
         let source = "'Hello ' 'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_u_string_concat_1() {
         let source = "'Hello ' u'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_u_string_concat_2() {
         let source = "u'Hello ' 'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_f_string_concat_1() {
         let source = "'Hello ' f'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_f_string_concat_2() {
         let source = "'Hello ' f'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_f_string_concat_3() {
         let source = "'Hello ' f'world{\"!\"}'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_f_string_concat_4() {
         let source = "'Hello ' f'world{\"!\"}' 'again!'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_u_f_string_concat_1() {
         let source = "u'Hello ' f'world'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_u_f_string_concat_2() {
         let source = "u'Hello ' f'world' '!'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_string_triple_quotes_with_kind() {
         let source = "u'''Hello, world!'''";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_single_quoted_byte() {
         // single quote
         let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_double_quoted_byte() {
         // double quote
         let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_escape_char_in_byte_literal() {
         // backslash does not escape
         let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_raw_byte_literal_1() {
         let source = r"rb'\x1z'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_raw_byte_literal_2() {
         let source = r"rb'\\'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_escape_octet() {
         let source = r"b'\43a\4\1234'";
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_escaped_newline() {
         let source = r#"f"\n{x}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_constant_range() {
         let source = r#"f"aaa{bbb}ccc{ddd}eee""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_unescaped_newline() {
         let source = r#"f"""
 {x}""""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_escaped_character() {
         let source = r#"f"\\{x}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_raw_fstring() {
         let source = r#"rf"{x}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_triple_quoted_raw_fstring() {
         let source = r#"rf"""{x}""""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_fstring_line_continuation() {
         let source = r#"rf"\
 {x}""#;
-        let parse_ast = parse_suite(source).unwrap();
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_nested_string_spec() {
         let source = r#"f"{foo:{''}}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_parse_fstring_nested_concatenation_string_spec() {
         let source = r#"f"{foo:{'' ''}}""#;
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     /// <https://github.com/astral-sh/ruff/issues/8355>
     #[test]
     fn test_dont_panic_on_8_in_octal_escape() {
         let source = r"bold = '\038[1m'";
-        let parse_ast = parse_suite(source).unwrap();
-
-        insta::assert_debug_snapshot!(parse_ast);
+        let suite = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(suite);
     }
 
     #[test]
     fn test_invalid_unicode_literal() {
         let source = r"'\x1ó34'";
         let error = parse_suite(source).unwrap_err();
-
         insta::assert_debug_snapshot!(error);
     }
 
@@ -818,7 +816,6 @@ mod tests {
     fn test_missing_unicode_lbrace_error() {
         let source = r"'\N '";
         let error = parse_suite(source).unwrap_err();
-
         insta::assert_debug_snapshot!(error);
     }
 
@@ -826,7 +823,6 @@ mod tests {
     fn test_missing_unicode_rbrace_error() {
         let source = r"'\N{SPACE'";
         let error = parse_suite(source).unwrap_err();
-
         insta::assert_debug_snapshot!(error);
     }
 
@@ -834,7 +830,6 @@ mod tests {
     fn test_invalid_unicode_name_error() {
         let source = r"'\N{INVALID}'";
         let error = parse_suite(source).unwrap_err();
-
         insta::assert_debug_snapshot!(error);
     }
 
@@ -842,7 +837,6 @@ mod tests {
     fn test_invalid_byte_literal_error() {
         let source = r"b'123a𝐁c'";
         let error = parse_suite(source).unwrap_err();
-
         insta::assert_debug_snapshot!(error);
     }
 
@@ -852,8 +846,8 @@ mod tests {
             #[test]
             fn $name() {
                 let source = format!(r#""\N{{{0}}}""#, $alias);
-                let parse_ast = parse_suite(&source).unwrap();
-                insta::assert_debug_snapshot!(parse_ast);
+                let suite = parse_suite(&source).unwrap();
+                insta::assert_debug_snapshot!(suite);
             }
         )*
         }
diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs
index e2828be891e4a..4ccebc8eb7992 100644
--- a/crates/ruff_python_parser/src/token_source.rs
+++ b/crates/ruff_python_parser/src/token_source.rs
@@ -1,4 +1,5 @@
-use ruff_text_size::{TextRange, TextSize};
+use ruff_python_trivia::CommentRanges;
+use ruff_text_size::{Ranged, TextRange, TextSize};
 
 use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenValue};
 use crate::{Mode, TokenKind};
@@ -13,14 +14,19 @@ pub(crate) struct TokenSource<'src> {
     /// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector
     /// holds both the trivia and non-trivia tokens.
     tokens: Vec<Token>,
+
+    /// A vector containing the range of all the comment tokens emitted by the lexer.
+    comments: Vec<TextRange>,
 }
 
 impl<'src> TokenSource<'src> {
     /// Create a new token source for the given lexer.
     pub(crate) fn new(lexer: Lexer<'src>) -> Self {
+        // TODO(dhruvmanila): Use `allocate_tokens_vec`
         TokenSource {
             lexer,
             tokens: vec![],
+            comments: vec![],
         }
     }
 
@@ -85,6 +91,9 @@ impl<'src> TokenSource<'src> {
         loop {
             let next = self.lexer.next_token();
             if next.is_trivia() {
+                if next.is_comment() {
+                    self.comments.push(next.range());
+                }
                 self.tokens.push(next);
                 continue;
             }
@@ -92,7 +101,7 @@ impl<'src> TokenSource<'src> {
         }
     }
 
-    /// Returns the next non-trivia token without adding it to the token vector.
+    /// Returns the next non-trivia token without adding it to any vector.
     fn next_non_trivia_token(&mut self) -> TokenKind {
         loop {
             let next = self.lexer.next_token();
@@ -108,6 +117,7 @@ impl<'src> TokenSource<'src> {
         TokenSourceCheckpoint {
             lexer: self.lexer.checkpoint(),
             tokens_position: self.tokens.len(),
+            comments_position: self.comments.len(),
         }
     }
 
@@ -115,22 +125,35 @@ impl<'src> TokenSource<'src> {
     pub(crate) fn rewind(&mut self, checkpoint: TokenSourceCheckpoint<'src>) {
         self.lexer.rewind(checkpoint.lexer);
         self.tokens.truncate(checkpoint.tokens_position);
+        self.comments.truncate(checkpoint.comments_position);
     }
 
     /// Consumes the token source, returning the collected tokens and any errors encountered during
     /// lexing. The token collection includes both the trivia and non-trivia tokens.
-    pub(crate) fn finish(self) -> (Vec<Token>, Vec<LexicalError>) {
+    pub(crate) fn finish(self) -> (Vec<Token>, CommentRanges, Vec<LexicalError>) {
         assert_eq!(
             self.current_kind(),
             TokenKind::EndOfFile,
             "TokenSource was not fully consumed"
         );
 
-        (self.tokens, self.lexer.finish())
+        let comment_ranges = CommentRanges::new(self.comments);
+        (self.tokens, comment_ranges, self.lexer.finish())
     }
 }
 
 pub(crate) struct TokenSourceCheckpoint<'src> {
     lexer: LexerCheckpoint<'src>,
     tokens_position: usize,
+    comments_position: usize,
+}
+
+/// Allocates a [`Vec`] with an approximated capacity to fit all tokens
+/// of `contents`.
+///
+/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation.
+#[allow(dead_code)]
+fn allocate_tokens_vec(contents: &str) -> Vec<Token> {
+    let lower_bound = contents.len().saturating_mul(15) / 100;
+    Vec::with_capacity(lower_bound)
 }
diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs
index c8d82304e90ca..02ebf3243c0b3 100644
--- a/crates/ruff_python_parser/src/typing.rs
+++ b/crates/ruff_python_parser/src/typing.rs
@@ -6,7 +6,7 @@ use ruff_python_ast::relocate::relocate_expr;
 use ruff_python_ast::{str, Expr};
 use ruff_text_size::{TextLen, TextRange};
 
-use crate::{parse_expression, parse_expression_starts_at};
+use crate::{parse_expression, parse_expression_range};
 
 #[derive(is_macro::Is, Copy, Clone, Debug)]
 pub enum AnnotationKind {
@@ -22,25 +22,30 @@ pub enum AnnotationKind {
     Complex,
 }
 
-/// Parse a type annotation from a string.
+/// Parses the value of a string literal node (`parsed_contents`) with `range` as a type
+/// annotation. The given `source` is the entire source code.
 pub fn parse_type_annotation(
-    value: &str,
+    parsed_contents: &str,
     range: TextRange,
     source: &str,
 ) -> Result<(Expr, AnnotationKind)> {
     let expression = &source[range];
 
-    if str::raw_contents(expression).is_some_and(|body| body == value) {
+    if str::raw_contents(expression).is_some_and(|raw_contents| raw_contents == parsed_contents) {
         // The annotation is considered "simple" if and only if the raw representation (e.g.,
         // `List[int]` within "List[int]") exactly matches the parsed representation. This
         // isn't the case, e.g., for implicit concatenations, or for annotations that contain
         // escaped quotes.
-        let leading_quote = str::leading_quote(expression).unwrap();
-        let expr = parse_expression_starts_at(value, range.start() + leading_quote.text_len())?;
+        let leading_quote_len = str::leading_quote(expression).unwrap().text_len();
+        let trailing_quote_len = str::trailing_quote(expression).unwrap().text_len();
+        let range = range
+            .add_start(leading_quote_len)
+            .sub_end(trailing_quote_len);
+        let expr = parse_expression_range(source, range)?.into_expr();
         Ok((expr, AnnotationKind::Simple))
     } else {
         // Otherwise, consider this a "complex" annotation.
-        let mut expr = parse_expression(value)?;
+        let mut expr = parse_expression(parsed_contents)?.into_expr();
         relocate_expr(&mut expr, range);
         Ok((expr, AnnotationKind::Complex))
     }
diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs
index 8e77242881825..5d52f94493545 100644
--- a/crates/ruff_python_parser/tests/fixtures.rs
+++ b/crates/ruff_python_parser/tests/fixtures.rs
@@ -8,7 +8,7 @@ use annotate_snippets::snippet::{AnnotationType, Slice, Snippet, SourceAnnotatio
 
 use ruff_python_ast::visitor::preorder::{walk_module, PreorderVisitor, TraversalSignal};
 use ruff_python_ast::{AnyNodeRef, Mod};
-use ruff_python_parser::{Mode, ParseErrorType, Program};
+use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType};
 use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
@@ -36,7 +36,7 @@ fn inline_err() {
 /// Snapshots the AST.
 fn test_valid_syntax(input_path: &Path) {
     let source = fs::read_to_string(input_path).expect("Expected test file to exist");
-    let program = Program::parse(&source, Mode::Module);
+    let program = parse_unchecked(&source, Mode::Module);
 
     if !program.is_valid() {
         let line_index = LineIndex::from_source_text(&source);
@@ -60,11 +60,11 @@ fn test_valid_syntax(input_path: &Path) {
         panic!("{input_path:?}: {message}");
     }
 
-    validate_ast(program.ast(), source.text_len(), input_path);
+    validate_ast(program.syntax(), source.text_len(), input_path);
 
     let mut output = String::new();
     writeln!(&mut output, "## AST").unwrap();
-    writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap();
+    writeln!(&mut output, "\n```\n{:#?}\n```", program.syntax()).unwrap();
 
     insta::with_settings!({
         omit_expression => true,
@@ -79,18 +79,18 @@ fn test_valid_syntax(input_path: &Path) {
 /// Snapshots the AST and the error messages.
 fn test_invalid_syntax(input_path: &Path) {
     let source = fs::read_to_string(input_path).expect("Expected test file to exist");
-    let program = Program::parse(&source, Mode::Module);
+    let program = parse_unchecked(&source, Mode::Module);
 
     assert!(
         !program.is_valid(),
         "{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors."
     );
 
-    validate_ast(program.ast(), source.text_len(), input_path);
+    validate_ast(program.syntax(), source.text_len(), input_path);
 
     let mut output = String::new();
     writeln!(&mut output, "## AST").unwrap();
-    writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap();
+    writeln!(&mut output, "\n```\n{:#?}\n```", program.syntax()).unwrap();
 
     writeln!(&mut output, "## Errors\n").unwrap();
 
@@ -129,9 +129,9 @@ fn parser_quick_test() {
 data[*x,]
 ";
 
-    let program = Program::parse(source, Mode::Module);
+    let program = parse_unchecked(source, Mode::Module);
 
-    println!("AST:\n----\n{:#?}", program.ast());
+    println!("AST:\n----\n{:#?}", program.syntax());
 
     if !program.is_valid() {
         println!("Errors:\n-------");