Skip to content

Commit

Permalink
Support type alias statements in simple statement positions (#8916)
Browse files Browse the repository at this point in the history
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Our `SoftKeywordTokenizer` only respected soft keywords in compound
statement positions -- for example, at the start of a logical line:

```python
type X = int
```

However, type aliases can also appear in simple statement positions,
like:

```python
class Class: type X = int
```

(Note that `match` and `case` are _not_ valid keywords in such
positions.)

This PR upgrades the tokenizer to track both kinds of valid positions.

Closes #8900.
Closes #8899.

## Test Plan

`cargo test`
  • Loading branch information
charliermarsh authored Nov 30, 2023
1 parent 073eddb commit 20782ab
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 18 deletions.
11 changes: 11 additions & 0 deletions crates/ruff_python_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,10 @@ type X \
[T] = T
type X[T] \
= T
# simple statements
type X = int; type X = str; type X = type
class X: type X = int
"#;
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
}
Expand Down Expand Up @@ -859,10 +863,17 @@ type (
type = 1
type = x = 1
x = type = 1
lambda x: type
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
}

#[test]
fn test_invalid_type() {
assert!(parse_suite("a: type X = int", "<test>").is_err());
assert!(parse_suite("lambda: type X = int", "<test>").is_err());
}

#[test]
fn numeric_literals() {
let source = r"x = 123456789
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -849,4 +849,98 @@ expression: "parse_suite(source, \"<test>\").unwrap()"
),
},
),
TypeAlias(
StmtTypeAlias {
range: 590..602,
name: Name(
ExprName {
range: 595..596,
id: "X",
ctx: Store,
},
),
type_params: None,
value: Name(
ExprName {
range: 599..602,
id: "int",
ctx: Load,
},
),
},
),
TypeAlias(
StmtTypeAlias {
range: 604..616,
name: Name(
ExprName {
range: 609..610,
id: "X",
ctx: Store,
},
),
type_params: None,
value: Name(
ExprName {
range: 613..616,
id: "str",
ctx: Load,
},
),
},
),
TypeAlias(
StmtTypeAlias {
range: 618..631,
name: Name(
ExprName {
range: 623..624,
id: "X",
ctx: Store,
},
),
type_params: None,
value: Name(
ExprName {
range: 627..631,
id: "type",
ctx: Load,
},
),
},
),
ClassDef(
StmtClassDef {
range: 632..653,
decorator_list: [],
name: Identifier {
id: "X",
range: 638..639,
},
type_params: None,
arguments: None,
body: [
TypeAlias(
StmtTypeAlias {
range: 641..653,
name: Name(
ExprName {
range: 646..647,
id: "X",
ctx: Store,
},
),
type_params: None,
value: Name(
ExprName {
range: 650..653,
id: "int",
ctx: Load,
},
),
},
),
],
},
),
]
Original file line number Diff line number Diff line change
Expand Up @@ -988,4 +988,44 @@ expression: "parse_suite(source, \"<test>\").unwrap()"
),
},
),
Expr(
StmtExpr {
range: 652..666,
value: Lambda(
ExprLambda {
range: 652..666,
parameters: Some(
Parameters {
range: 659..660,
posonlyargs: [],
args: [
ParameterWithDefault {
range: 659..660,
parameter: Parameter {
range: 659..660,
name: Identifier {
id: "x",
range: 659..660,
},
annotation: None,
},
default: None,
},
],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
),
body: Name(
ExprName {
range: 662..666,
id: "type",
ctx: Load,
},
),
},
),
},
),
]
97 changes: 79 additions & 18 deletions crates/ruff_python_parser/src/soft_keywords.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{lexer::LexResult, token::Tok, Mode};
use itertools::{Itertools, MultiPeek};

use crate::{lexer::LexResult, token::Tok, Mode};

/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
/// `case`, and `type`).
///
Expand All @@ -21,7 +22,7 @@ where
I: Iterator<Item = LexResult>,
{
underlying: MultiPeek<I>,
start_of_line: bool,
position: Position,
}

impl<I> SoftKeywordTransformer<I>
Expand All @@ -31,7 +32,11 @@ where
pub fn new(lexer: I, mode: Mode) -> Self {
Self {
underlying: lexer.multipeek(), // spell-checker:ignore multipeek
start_of_line: !matches!(mode, Mode::Expression),
position: if mode == Mode::Expression {
Position::Other
} else {
Position::Statement
},
}
}
}
Expand All @@ -49,17 +54,16 @@ where
// If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's
// used as an identifier. We assume every soft keyword use is an identifier unless
// a heuristic is met.

match tok {
// For `match` and `case`, all of the following conditions must be met:
// 1. The token is at the start of a logical line.
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
// inside a parenthesized expression, list, or dictionary).
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
// (This is to avoid treating `match` or `case` as identifiers when annotated with
// type hints.) type hints.)
// type hints.)
Tok::Match | Tok::Case => {
if self.start_of_line {
if matches!(self.position, Position::Statement) {
let mut nesting = 0;
let mut first = true;
let mut seen_colon = false;
Expand Down Expand Up @@ -93,7 +97,10 @@ where
// 2. The type token is immediately followed by a name token.
// 3. The name token is eventually followed by an equality token.
Tok::Type => {
if self.start_of_line {
if matches!(
self.position,
Position::Statement | Position::SimpleStatement
) {
let mut is_type_alias = false;
if let Some(Ok((tok, _))) = self.underlying.peek() {
if matches!(
Expand Down Expand Up @@ -132,18 +139,56 @@ where
}
}

self.start_of_line = next.as_ref().is_some_and(|lex_result| {
lex_result.as_ref().is_ok_and(|(tok, _)| {
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
return self.start_of_line;
// Update the position, to track whether we're at the start of a logical line.
if let Some(lex_result) = next.as_ref() {
if let Ok((tok, _)) = lex_result.as_ref() {
match tok {
Tok::NonLogicalNewline | Tok::Comment { .. } => {
// Nothing to do.
}
Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent => {
self.position = Position::Statement;
}
// If we see a semicolon, assume we're at the start of a simple statement, as in:
// ```python
// type X = int; type Y = float
// ```
Tok::Semi => {
self.position = Position::SimpleStatement;
}
// If we see a colon, and we're not in a nested context, assume we're at the
// start of a simple statement, as in:
// ```python
// class Class: type X = int
// ```
Tok::Colon if self.position == Position::Other => {
self.position = Position::SimpleStatement;
}
Tok::Lpar | Tok::Lsqb | Tok::Lbrace => {
self.position = if let Position::Nested(depth) = self.position {
Position::Nested(depth.saturating_add(1))
} else {
Position::Nested(1)
};
}
Tok::Rpar | Tok::Rsqb | Tok::Rbrace => {
self.position = if let Position::Nested(depth) = self.position {
let depth = depth.saturating_sub(1);
if depth > 0 {
Position::Nested(depth)
} else {
Position::Other
}
} else {
Position::Other
};
}
_ => {
self.position = Position::Other;
}
}

matches!(
tok,
Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent
)
})
});
}
}

next
}
Expand All @@ -161,3 +206,19 @@ fn soft_to_name(tok: &Tok) -> Tok {
name: name.to_owned(),
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Position {
/// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement.
Statement,
/// The lexer is at the start of a simple statement, e.g., a statement following a semicolon
/// or colon, as in:
/// ```python
/// class Class: type X = int
/// ```
SimpleStatement,
/// The lexer is within brackets, with the given bracket nesting depth.
Nested(u32),
/// The lexer is some other location.
Other,
}

0 comments on commit 20782ab

Please sign in to comment.