diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index 0a4ca75cb5bf85..d863c6465855ef 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -471,7 +471,7 @@ impl<'src> Parser<'src> { if self.current_token_kind().is_soft_keyword() { let id = self.src_text(range).to_string(); - self.bump_any(); + self.bump_soft_keyword_as_name(); return ast::Identifier { id, range }; } @@ -1343,7 +1343,7 @@ impl<'src> Parser<'src> { // `Invalid` tokens are created when there's a lexical error, so // we ignore it here to avoid creating unexpected token errors TokenKind::Unknown => { - parser.next_token(); + parser.bump_any(); return; } tok => { diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index f75d4d73b103a3..28922254e94f35 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -37,6 +37,11 @@ impl Program { &self.ast } + /// Returns all the tokens for the program. + pub fn tokens(&self) -> &[Token] { + &self.tokens + } + /// Returns a list of syntax errors found during parsing. pub fn errors(&self) -> &[ParseError] { &self.parse_errors @@ -154,7 +159,7 @@ impl<'src> Parser<'src> { if self.at(TokenKind::EndOfFile) { break; } - self.next_token(); + self.bump_any(); } } @@ -315,8 +320,8 @@ impl<'src> Parser<'src> { } /// Moves the parser to the next token. - fn next_token(&mut self) { - self.tokens.next_token(); + fn do_bump(&mut self, kind: TokenKind) { + self.tokens.bump(kind); self.current_token_id.increment(); @@ -357,15 +362,15 @@ impl<'src> Parser<'src> { self.current_token_id } - /// Eat the current token if it is of the given kind, returning `true` in - /// that case. Otherwise, return `false`. - fn eat(&mut self, kind: TokenKind) -> bool { - if self.at(kind) { - self.next_token(); - true - } else { - false - } + /// Bumps the current token assuming it is of the given kind. + /// + /// # Panics + /// + /// If the current token is not of the given kind. + fn bump(&mut self, kind: TokenKind) { + assert_eq!(self.current_token_kind(), kind); + + self.do_bump(kind); } /// Take the token value from the underlying token source and bump the current token. @@ -379,26 +384,16 @@ impl<'src> Parser<'src> { value } - /// Bumps the current token assuming it is of the given kind. - /// - /// # Panics - /// - /// If the current token is not of the given kind. - fn bump(&mut self, kind: TokenKind) { - assert_eq!(self.current_token_kind(), kind); - - self.next_token(); - } - /// Bumps the current token assuming it is found in the given token set. /// /// # Panics /// /// If the current token is not found in the given token set. fn bump_ts(&mut self, ts: TokenSet) { - assert!(ts.contains(self.current_token_kind())); + let kind = self.current_token_kind(); + assert!(ts.contains(kind)); - self.next_token(); + self.do_bump(kind); } /// Bumps the current token regardless of its kind and advances to the next token. @@ -407,11 +402,35 @@ impl<'src> Parser<'src> { /// /// If the parser is at end of file. fn bump_any(&mut self) { - assert_ne!(self.current_token_kind(), TokenKind::EndOfFile); + let kind = self.current_token_kind(); + assert_ne!(kind, TokenKind::EndOfFile); + + self.do_bump(kind); + } + + /// Bumps the soft keyword token as a `Name` token. + /// + /// # Panics + /// + /// If the current token is not a soft keyword. + pub(crate) fn bump_soft_keyword_as_name(&mut self) { + assert!(self.current_token_kind().is_soft_keyword()); - self.next_token(); + self.do_bump(TokenKind::Name); + } + + /// Consume the current token if it is of the given kind. Returns `true` if it matches, `false` + /// otherwise. + fn eat(&mut self, kind: TokenKind) -> bool { + if self.at(kind) { + self.do_bump(kind); + true + } else { + false + } } + /// Eat the current token if its of the expected kind, otherwise adds an appropriate error. fn expect(&mut self, expected: TokenKind) -> bool { if self.eat(expected) { return true; @@ -522,7 +541,7 @@ impl<'src> Parser<'src> { break; } - self.next_token(); + self.bump_any(); } } @@ -606,7 +625,7 @@ impl<'src> Parser<'src> { trailing_comma_range = None; } - self.next_token(); + self.bump_any(); } } diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 50e42ca525ace4..8d99aa1449ddc7 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -30,7 +30,7 @@ impl<'src> TokenSource<'src> { let mut source = TokenSource::new(lexer); // Initialize the token source so that the current token is set correctly. - source.next_token(); + source.do_bump(); source } @@ -53,26 +53,40 @@ impl<'src> TokenSource<'src> { /// Returns the next non-trivia token without consuming it. pub(crate) fn peek(&mut self) -> TokenKind { let checkpoint = self.lexer.checkpoint(); - let next = loop { + let next = self.next_non_trivia_token(); + self.lexer.rewind(checkpoint); + next + } + + /// Bumps the token source to the next non-trivia token. + /// + /// It pushes the given kind to the token vector with the current token range. + pub(crate) fn bump(&mut self, kind: TokenKind) { + self.tokens.push(Token::new(kind, self.current_range())); + self.do_bump(); + } + + /// Bumps the token source to the next non-trivia token without adding the current token to the + /// token vector. It does add the trivia tokens to the token vector. + fn do_bump(&mut self) { + loop { let next = self.lexer.next_token(); if next.is_trivia() { + self.tokens.push(next); continue; } - break next.kind(); - }; - self.lexer.rewind(checkpoint); - next + break; + } } - /// Moves the lexer to the next non-trivia token. - pub(crate) fn next_token(&mut self) { + /// Returns the next non-trivia token without adding it to the token vector. + fn next_non_trivia_token(&mut self) -> TokenKind { loop { let next = self.lexer.next_token(); - self.tokens.push(next); if next.is_trivia() { continue; } - break; + break next.kind(); } }