From 01e0119f68aab9d4e73f94a8517d5b35b9ad9e50 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 29 Nov 2023 19:54:53 -0500 Subject: [PATCH] Support type alias statements in simple statement positions --- crates/ruff_python_parser/src/parser.rs | 4 + ...parser__tests__parse_type_declaration.snap | 94 +++++++++++++++++++ .../ruff_python_parser/src/soft_keywords.rs | 94 +++++++++++++++---- 3 files changed, 176 insertions(+), 16 deletions(-) diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 82b5a26b8f89b..557270babb1a9 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -822,6 +822,10 @@ type X \ [T] = T type X[T] \ = T + +# simple statements +type X = int; type X = str; type X = type +class X: type X = int "#; insta::assert_debug_snapshot!(parse_suite(source, "").unwrap()); } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap index e8ddd7d13d1eb..7730469ff75ef 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap @@ -849,4 +849,98 @@ expression: "parse_suite(source, \"\").unwrap()" ), }, ), + TypeAlias( + StmtTypeAlias { + range: 590..602, + name: Name( + ExprName { + range: 595..596, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 599..602, + id: "int", + ctx: Load, + }, + ), + }, + ), + TypeAlias( + StmtTypeAlias { + range: 604..616, + name: Name( + ExprName { + range: 609..610, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 613..616, + id: "str", + ctx: Load, + }, + ), + }, + ), + TypeAlias( + StmtTypeAlias { + range: 618..631, + name: Name( + ExprName { + range: 623..624, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 627..631, + id: "type", + ctx: Load, + }, + ), + }, + ), + ClassDef( + StmtClassDef { + range: 632..653, + decorator_list: [], + name: Identifier { + id: "X", + range: 638..639, + }, + type_params: None, + arguments: None, + body: [ + TypeAlias( + StmtTypeAlias { + range: 641..653, + name: Name( + ExprName { + range: 646..647, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 650..653, + id: "int", + ctx: Load, + }, + ), + }, + ), + ], + }, + ), ] diff --git a/crates/ruff_python_parser/src/soft_keywords.rs b/crates/ruff_python_parser/src/soft_keywords.rs index 6d14dc7f37d72..274b3182c396b 100644 --- a/crates/ruff_python_parser/src/soft_keywords.rs +++ b/crates/ruff_python_parser/src/soft_keywords.rs @@ -1,6 +1,7 @@ -use crate::{lexer::LexResult, token::Tok, Mode}; use itertools::{Itertools, MultiPeek}; +use crate::{lexer::LexResult, token::Tok, Mode}; + /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` /// `case`, and `type`). /// @@ -21,7 +22,7 @@ where I: Iterator, { underlying: MultiPeek, - start_of_line: bool, + position: Position, } impl SoftKeywordTransformer @@ -31,7 +32,11 @@ where pub fn new(lexer: I, mode: Mode) -> Self { Self { underlying: lexer.multipeek(), // spell-checker:ignore multipeek - start_of_line: !matches!(mode, Mode::Expression), + position: if matches!(mode, Mode::Expression) { + Position::Other + } else { + Position::Statement + }, } } } @@ -59,7 +64,7 @@ where // (This is to avoid treating `match` or `case` as identifiers when annotated with // type hints.) type hints.) Tok::Match | Tok::Case => { - if self.start_of_line { + if matches!(self.position, Position::Statement) { let mut nesting = 0; let mut first = true; let mut seen_colon = false; @@ -93,7 +98,10 @@ where // 2. The type token is immediately followed by a name token. // 3. The name token is eventually followed by an equality token. Tok::Type => { - if self.start_of_line { + if matches!( + self.position, + Position::Statement | Position::SimpleStatement + ) { let mut is_type_alias = false; if let Some(Ok((tok, _))) = self.underlying.peek() { if matches!( @@ -132,18 +140,56 @@ where } } - self.start_of_line = next.as_ref().is_some_and(|lex_result| { - lex_result.as_ref().is_ok_and(|(tok, _)| { - if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) { - return self.start_of_line; + // Update the position, to track whether we're at the start of a logical line. + if let Some(lex_result) = next.as_ref() { + if let Ok((tok, _)) = lex_result.as_ref() { + match tok { + Tok::NonLogicalNewline | Tok::Comment { .. } => { + // Nothing to do. + } + Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent => { + self.position = Position::Statement; + } + // If we see a semicolon, assume we're at the start of a simple statement, as in: + // ```python + // type X = int; type Y = float + // ``` + Tok::Semi => { + self.position = Position::SimpleStatement; + } + // If we see a colon, and we're not in a nested context, assume we're at the + // start of a simple statement, as in: + // ```python + // class Class: type X = int + // ``` + Tok::Colon if matches!(self.position, Position::Other) => { + self.position = Position::SimpleStatement; + } + Tok::Lpar | Tok::Lsqb | Tok::Lbrace => { + self.position = if let Position::Nested(depth) = self.position { + Position::Nested(depth.saturating_add(1)) + } else { + Position::Nested(1) + }; + } + Tok::Rpar | Tok::Rsqb | Tok::Rbrace => { + self.position = if let Position::Nested(depth) = self.position { + let depth = depth.saturating_sub(1); + if depth > 0 { + Position::Nested(depth) + } else { + Position::Other + } + } else { + Position::Other + }; + } + _ => { + self.position = Position::Other; + } } - - matches!( - tok, - Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent - ) - }) - }); + } + } next } @@ -161,3 +207,19 @@ fn soft_to_name(tok: &Tok) -> Tok { name: name.to_owned(), } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Position { + /// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement. + Statement, + /// The lexer is at the start of a simple statement, e.g., a statement following a semicolon + /// or colon, as in: + /// ```python + /// class Class: type X = int + /// ``` + SimpleStatement, + /// The lexer is within brackets, with the given bracket nesting depth. + Nested(u32), + /// The lexer is some other location. + Other, +}