Skip to content

Commit

Permalink
Add checkpoint logic for the parser (#11441)
Browse files Browse the repository at this point in the history
## Summary

This PR adds the checkpoint logic to the parser and token source. It
also updates the lexer checkpoint to contain the error position.
  • Loading branch information
dhruvmanila committed May 30, 2024
1 parent 04c10f1 commit 5df3eb1
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 3 deletions.
5 changes: 4 additions & 1 deletion crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1326,7 +1326,7 @@ impl<'src> Lexer<'src> {
std::mem::take(&mut self.value)
}

/// Creates a checkpoint to which it can later return to using [`Self::rewind`].
/// Creates a checkpoint to which the lexer can later return to using [`Self::rewind`].
pub(crate) fn checkpoint(&self) -> LexerCheckpoint<'src> {
LexerCheckpoint {
value: self.value.clone(),
Expand All @@ -1337,6 +1337,7 @@ impl<'src> Lexer<'src> {
indentations_checkpoint: self.indentations.checkpoint(),
pending_indentation: self.pending_indentation,
fstrings_checkpoint: self.fstrings.checkpoint(),
errors_position: self.errors.len(),
}
}

Expand All @@ -1350,6 +1351,7 @@ impl<'src> Lexer<'src> {
self.indentations.rewind(checkpoint.indentations_checkpoint);
self.pending_indentation = checkpoint.pending_indentation;
self.fstrings.rewind(checkpoint.fstrings_checkpoint);
self.errors.truncate(checkpoint.errors_position);
}

pub fn finish(self) -> Vec<LexicalError> {
Expand Down Expand Up @@ -1568,6 +1570,7 @@ pub(crate) struct LexerCheckpoint<'src> {
indentations_checkpoint: IndentationsCheckpoint,
pending_indentation: Option<Indentation>,
fstrings_checkpoint: FStringsCheckpoint,
errors_position: usize,
}

#[derive(Copy, Clone, Debug)]
Expand Down
30 changes: 29 additions & 1 deletion crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::lexer::{Token, TokenValue};
use crate::parser::expression::ExpressionContext;
use crate::parser::progress::{ParserProgress, TokenId};
use crate::token_set::TokenSet;
use crate::token_source::TokenSource;
use crate::token_source::{TokenSource, TokenSourceCheckpoint};
use crate::{Mode, ParseError, ParseErrorType, TokenKind};

mod expression;
Expand Down Expand Up @@ -632,6 +632,34 @@ impl<'src> Parser<'src> {

false
}

/// Creates a checkpoint to which the parser can later return to using [`Self::rewind`].
fn checkpoint(&self) -> ParserCheckpoint<'src> {
ParserCheckpoint {
tokens: self.tokens.checkpoint(),
errors_position: self.errors.len(),
current_token_id: self.current_token_id,
prev_token_end: self.prev_token_end,
recovery_context: self.recovery_context,
}
}

/// Restore the parser to the given checkpoint.
fn rewind(&mut self, checkpoint: ParserCheckpoint<'src>) {
self.tokens.rewind(checkpoint.tokens);
self.errors.truncate(checkpoint.errors_position);
self.current_token_id = checkpoint.current_token_id;
self.prev_token_end = checkpoint.prev_token_end;
self.recovery_context = checkpoint.recovery_context;
}
}

struct ParserCheckpoint<'src> {
tokens: TokenSourceCheckpoint<'src>,
errors_position: usize,
current_token_id: TokenId,
prev_token_end: TextSize,
recovery_context: RecoveryContext,
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Expand Down
21 changes: 20 additions & 1 deletion crates/ruff_python_parser/src/token_source.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use ruff_text_size::{TextRange, TextSize};

use crate::lexer::{Lexer, LexicalError, Token, TokenValue};
use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenValue};
use crate::{Mode, TokenKind};

/// Token source for the parser that skips over any trivia tokens.
Expand Down Expand Up @@ -76,6 +76,20 @@ impl<'src> TokenSource<'src> {
}
}

/// Creates a checkpoint to which the token source can later return to using [`Self::rewind`].
pub(crate) fn checkpoint(&self) -> TokenSourceCheckpoint<'src> {
TokenSourceCheckpoint {
lexer: self.lexer.checkpoint(),
tokens_position: self.tokens.len(),
}
}

/// Restore the token source to the given checkpoint.
pub(crate) fn rewind(&mut self, checkpoint: TokenSourceCheckpoint<'src>) {
self.lexer.rewind(checkpoint.lexer);
self.tokens.truncate(checkpoint.tokens_position);
}

/// Consumes the token source, returning the collected tokens and any errors encountered during
/// lexing. The token collection includes both the trivia and non-trivia tokens.
pub(crate) fn finish(self) -> (Vec<Token>, Vec<LexicalError>) {
Expand All @@ -88,3 +102,8 @@ impl<'src> TokenSource<'src> {
(self.tokens, self.lexer.finish())
}
}

pub(crate) struct TokenSourceCheckpoint<'src> {
lexer: LexerCheckpoint<'src>,
tokens_position: usize,
}

0 comments on commit 5df3eb1

Please sign in to comment.