From 01e0119f68aab9d4e73f94a8517d5b35b9ad9e50 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 29 Nov 2023 19:54:53 -0500 Subject: [PATCH 1/2] Support type alias statements in simple statement positions --- crates/ruff_python_parser/src/parser.rs | 4 + ...parser__tests__parse_type_declaration.snap | 94 +++++++++++++++++++ .../ruff_python_parser/src/soft_keywords.rs | 94 +++++++++++++++---- 3 files changed, 176 insertions(+), 16 deletions(-) diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 82b5a26b8f89b..557270babb1a9 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -822,6 +822,10 @@ type X \ [T] = T type X[T] \ = T + +# simple statements +type X = int; type X = str; type X = type +class X: type X = int "#; insta::assert_debug_snapshot!(parse_suite(source, "").unwrap()); } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap index e8ddd7d13d1eb..7730469ff75ef 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__parse_type_declaration.snap @@ -849,4 +849,98 @@ expression: "parse_suite(source, \"\").unwrap()" ), }, ), + TypeAlias( + StmtTypeAlias { + range: 590..602, + name: Name( + ExprName { + range: 595..596, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 599..602, + id: "int", + ctx: Load, + }, + ), + }, + ), + TypeAlias( + StmtTypeAlias { + range: 604..616, + name: Name( + ExprName { + range: 609..610, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 613..616, + id: "str", + ctx: Load, + }, + ), + }, + ), + TypeAlias( + StmtTypeAlias { + range: 618..631, + name: Name( + ExprName { + range: 623..624, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 627..631, + id: "type", + ctx: Load, + }, + ), + }, + ), + ClassDef( + StmtClassDef { + range: 632..653, + decorator_list: [], + name: Identifier { + id: "X", + range: 638..639, + }, + type_params: None, + arguments: None, + body: [ + TypeAlias( + StmtTypeAlias { + range: 641..653, + name: Name( + ExprName { + range: 646..647, + id: "X", + ctx: Store, + }, + ), + type_params: None, + value: Name( + ExprName { + range: 650..653, + id: "int", + ctx: Load, + }, + ), + }, + ), + ], + }, + ), ] diff --git a/crates/ruff_python_parser/src/soft_keywords.rs b/crates/ruff_python_parser/src/soft_keywords.rs index 6d14dc7f37d72..274b3182c396b 100644 --- a/crates/ruff_python_parser/src/soft_keywords.rs +++ b/crates/ruff_python_parser/src/soft_keywords.rs @@ -1,6 +1,7 @@ -use crate::{lexer::LexResult, token::Tok, Mode}; use itertools::{Itertools, MultiPeek}; +use crate::{lexer::LexResult, token::Tok, Mode}; + /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` /// `case`, and `type`). /// @@ -21,7 +22,7 @@ where I: Iterator, { underlying: MultiPeek, - start_of_line: bool, + position: Position, } impl SoftKeywordTransformer @@ -31,7 +32,11 @@ where pub fn new(lexer: I, mode: Mode) -> Self { Self { underlying: lexer.multipeek(), // spell-checker:ignore multipeek - start_of_line: !matches!(mode, Mode::Expression), + position: if matches!(mode, Mode::Expression) { + Position::Other + } else { + Position::Statement + }, } } } @@ -59,7 +64,7 @@ where // (This is to avoid treating `match` or `case` as identifiers when annotated with // type hints.) type hints.) Tok::Match | Tok::Case => { - if self.start_of_line { + if matches!(self.position, Position::Statement) { let mut nesting = 0; let mut first = true; let mut seen_colon = false; @@ -93,7 +98,10 @@ where // 2. The type token is immediately followed by a name token. // 3. The name token is eventually followed by an equality token. Tok::Type => { - if self.start_of_line { + if matches!( + self.position, + Position::Statement | Position::SimpleStatement + ) { let mut is_type_alias = false; if let Some(Ok((tok, _))) = self.underlying.peek() { if matches!( @@ -132,18 +140,56 @@ where } } - self.start_of_line = next.as_ref().is_some_and(|lex_result| { - lex_result.as_ref().is_ok_and(|(tok, _)| { - if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) { - return self.start_of_line; + // Update the position, to track whether we're at the start of a logical line. + if let Some(lex_result) = next.as_ref() { + if let Ok((tok, _)) = lex_result.as_ref() { + match tok { + Tok::NonLogicalNewline | Tok::Comment { .. } => { + // Nothing to do. + } + Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent => { + self.position = Position::Statement; + } + // If we see a semicolon, assume we're at the start of a simple statement, as in: + // ```python + // type X = int; type Y = float + // ``` + Tok::Semi => { + self.position = Position::SimpleStatement; + } + // If we see a colon, and we're not in a nested context, assume we're at the + // start of a simple statement, as in: + // ```python + // class Class: type X = int + // ``` + Tok::Colon if matches!(self.position, Position::Other) => { + self.position = Position::SimpleStatement; + } + Tok::Lpar | Tok::Lsqb | Tok::Lbrace => { + self.position = if let Position::Nested(depth) = self.position { + Position::Nested(depth.saturating_add(1)) + } else { + Position::Nested(1) + }; + } + Tok::Rpar | Tok::Rsqb | Tok::Rbrace => { + self.position = if let Position::Nested(depth) = self.position { + let depth = depth.saturating_sub(1); + if depth > 0 { + Position::Nested(depth) + } else { + Position::Other + } + } else { + Position::Other + }; + } + _ => { + self.position = Position::Other; + } } - - matches!( - tok, - Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent - ) - }) - }); + } + } next } @@ -161,3 +207,19 @@ fn soft_to_name(tok: &Tok) -> Tok { name: name.to_owned(), } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Position { + /// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement. + Statement, + /// The lexer is at the start of a simple statement, e.g., a statement following a semicolon + /// or colon, as in: + /// ```python + /// class Class: type X = int + /// ``` + SimpleStatement, + /// The lexer is within brackets, with the given bracket nesting depth. + Nested(u32), + /// The lexer is some other location. + Other, +} From 5f5a1cebfd51adc9e9f70b929a727f02bf0cdd61 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 30 Nov 2023 14:03:00 -0500 Subject: [PATCH 2/2] Expand tests --- crates/ruff_python_parser/src/parser.rs | 7 ++++ ...er__parser__tests__type_as_identifier.snap | 40 +++++++++++++++++++ .../ruff_python_parser/src/soft_keywords.rs | 7 ++-- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 557270babb1a9..963d3a33064eb 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -863,10 +863,17 @@ type ( type = 1 type = x = 1 x = type = 1 +lambda x: type "; insta::assert_debug_snapshot!(parse_suite(source, "").unwrap()); } + #[test] + fn test_invalid_type() { + assert!(parse_suite("a: type X = int", "").is_err()); + assert!(parse_suite("lambda: type X = int", "").is_err()); + } + #[test] fn numeric_literals() { let source = r"x = 123456789 diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__type_as_identifier.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__type_as_identifier.snap index ad394b4042ac6..0296e7ad0511e 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__type_as_identifier.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__type_as_identifier.snap @@ -988,4 +988,44 @@ expression: "parse_suite(source, \"\").unwrap()" ), }, ), + Expr( + StmtExpr { + range: 652..666, + value: Lambda( + ExprLambda { + range: 652..666, + parameters: Some( + Parameters { + range: 659..660, + posonlyargs: [], + args: [ + ParameterWithDefault { + range: 659..660, + parameter: Parameter { + range: 659..660, + name: Identifier { + id: "x", + range: 659..660, + }, + annotation: None, + }, + default: None, + }, + ], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + ), + body: Name( + ExprName { + range: 662..666, + id: "type", + ctx: Load, + }, + ), + }, + ), + }, + ), ] diff --git a/crates/ruff_python_parser/src/soft_keywords.rs b/crates/ruff_python_parser/src/soft_keywords.rs index 274b3182c396b..379ae1c08db38 100644 --- a/crates/ruff_python_parser/src/soft_keywords.rs +++ b/crates/ruff_python_parser/src/soft_keywords.rs @@ -32,7 +32,7 @@ where pub fn new(lexer: I, mode: Mode) -> Self { Self { underlying: lexer.multipeek(), // spell-checker:ignore multipeek - position: if matches!(mode, Mode::Expression) { + position: if mode == Mode::Expression { Position::Other } else { Position::Statement @@ -54,7 +54,6 @@ where // If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's // used as an identifier. We assume every soft keyword use is an identifier unless // a heuristic is met. - match tok { // For `match` and `case`, all of the following conditions must be met: // 1. The token is at the start of a logical line. @@ -62,7 +61,7 @@ where // inside a parenthesized expression, list, or dictionary). // 3. The top-level colon is not the immediate sibling of a `match` or `case` token. // (This is to avoid treating `match` or `case` as identifiers when annotated with - // type hints.) type hints.) + // type hints.) Tok::Match | Tok::Case => { if matches!(self.position, Position::Statement) { let mut nesting = 0; @@ -162,7 +161,7 @@ where // ```python // class Class: type X = int // ``` - Tok::Colon if matches!(self.position, Position::Other) => { + Tok::Colon if self.position == Position::Other => { self.position = Position::SimpleStatement; } Tok::Lpar | Tok::Lsqb | Tok::Lbrace => {