From b28556d7397e6c81819df5d4b3cfa12b284f11bb Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 28 Nov 2023 18:32:35 -0600 Subject: [PATCH] Update `E402` to work at cell level for notebooks (#8872) ## Summary This PR updates the `E402` rule to work at cell level for Jupyter notebooks. This is enabled only in preview to gather feedback. The implementation basically resets the import boundary flag on the semantic model when we encounter the first statement in a cell. Another potential solution is to introduce `E403` rule that is specifically for notebooks that works at cell level while `E402` will be disabled for notebooks. ## Test Plan Add a notebook with imports in multiple cells and verify that the rule works as expected. resolves: #8669 --- .../test/fixtures/pycodestyle/E402.ipynb | 113 ++++++++++++++++++ crates/ruff_linter/src/checkers/ast/mod.rs | 16 +++ .../ruff_linter/src/rules/pycodestyle/mod.rs | 1 + .../src/rules/pycodestyle/rules/imports.rs | 24 ++-- ...__pycodestyle__tests__E402_E402.ipynb.snap | 29 +++++ crates/ruff_notebook/src/cell.rs | 28 ++++- crates/ruff_python_ast/src/lib.rs | 2 +- 7 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb create mode 100644 crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb new file mode 100644 index 0000000000000..9f9579e77e5dd --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1331140f-2741-4661-9086-0764368710c9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4113383-725d-4f04-80b8-a3080b2b8c4b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.path\n", + "\n", + "import pathlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5d2ef63-ae60-4311-bae3-42e845afba3f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79599475-a5ee-4f60-80d1-6efa77693da0", + "metadata": {}, + "outputs": [], + "source": [ + "import a\n", + "\n", + "try:\n", + " import b\n", + "except ImportError:\n", + " pass\n", + "else:\n", + " pass\n", + "\n", + "__some__magic = 1\n", + "\n", + "import c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863dcc35-5c8d-4d05-8b4a-91059e944112", + "metadata": {}, + "outputs": [], + "source": [ + "import ok\n", + "\n", + "\n", + "def foo() -> None:\n", + " import e\n", + "\n", + "\n", + "import no_ok" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b2377d0-b814-4057-83ec-d443d8e19401", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff-playground)", + "language": "python", + "name": "ruff-playground" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index bf0eba8e4fd86..e1b276936e5cf 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -107,6 +107,8 @@ pub(crate) struct Checker<'a> { pub(crate) diagnostics: Vec, /// The list of names already seen by flake8-bugbear diagnostics, to avoid duplicate violations.. pub(crate) flake8_bugbear_seen: Vec, + /// The end offset of the last visited statement. + last_stmt_end: TextSize, } impl<'a> Checker<'a> { @@ -142,6 +144,7 @@ impl<'a> Checker<'a> { diagnostics: Vec::default(), flake8_bugbear_seen: Vec::default(), cell_offsets, + last_stmt_end: TextSize::default(), } } } @@ -268,6 +271,18 @@ where // Step 0: Pre-processing self.semantic.push_node(stmt); + // For Jupyter Notebooks, we'll reset the `IMPORT_BOUNDARY` flag when + // we encounter a cell boundary. + if self.source_type.is_ipynb() + && self.semantic.at_top_level() + && self.semantic.seen_import_boundary() + && self.cell_offsets.is_some_and(|cell_offsets| { + cell_offsets.has_cell_boundary(TextRange::new(self.last_stmt_end, stmt.start())) + }) + { + self.semantic.flags -= SemanticModelFlags::IMPORT_BOUNDARY; + } + // Track whether we've seen docstrings, non-imports, etc. match stmt { Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => { @@ -779,6 +794,7 @@ where self.semantic.flags = flags_snapshot; self.semantic.pop_node(); + self.last_stmt_end = stmt.end(); } fn visit_annotation(&mut self, expr: &'b Expr) { diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index 112be16261155..b2a02f5e23c55 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -37,6 +37,7 @@ mod tests { #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))] #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))] #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))] + #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.ipynb"))] #[test_case(Rule::MultipleImportsOnOneLine, Path::new("E40.py"))] #[test_case(Rule::MultipleStatementsOnOneLineColon, Path::new("E70.py"))] #[test_case(Rule::MultipleStatementsOnOneLineSemicolon, Path::new("E70.py"))] diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs index 4a037d7d0d438..443e044498b75 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{Alias, Stmt}; +use ruff_python_ast::{Alias, PySourceType, Stmt}; use ruff_text_size::Ranged; use crate::checkers::ast::Checker; @@ -34,7 +34,8 @@ impl Violation for MultipleImportsOnOneLine { } /// ## What it does -/// Checks for imports that are not at the top of the file. +/// Checks for imports that are not at the top of the file. For Jupyter notebooks, this +/// checks for imports that are not at the top of the cell. /// /// ## Why is this bad? /// According to [PEP 8], "imports are always put at the top of the file, just after any @@ -61,12 +62,18 @@ impl Violation for MultipleImportsOnOneLine { /// /// [PEP 8]: https://peps.python.org/pep-0008/#imports #[violation] -pub struct ModuleImportNotAtTopOfFile; +pub struct ModuleImportNotAtTopOfFile { + source_type: PySourceType, +} impl Violation for ModuleImportNotAtTopOfFile { #[derive_message_formats] fn message(&self) -> String { - format!("Module level import not at top of file") + if self.source_type.is_ipynb() { + format!("Module level import not at top of cell") + } else { + format!("Module level import not at top of file") + } } } @@ -82,8 +89,11 @@ pub(crate) fn multiple_imports_on_one_line(checker: &mut Checker, stmt: &Stmt, n /// E402 pub(crate) fn module_import_not_at_top_of_file(checker: &mut Checker, stmt: &Stmt) { if checker.semantic().seen_import_boundary() && checker.semantic().at_top_level() { - checker - .diagnostics - .push(Diagnostic::new(ModuleImportNotAtTopOfFile, stmt.range())); + checker.diagnostics.push(Diagnostic::new( + ModuleImportNotAtTopOfFile { + source_type: checker.source_type, + }, + stmt.range(), + )); } } diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap new file mode 100644 index 0000000000000..70562983d36e0 --- /dev/null +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap @@ -0,0 +1,29 @@ +--- +source: crates/ruff_linter/src/rules/pycodestyle/mod.rs +--- +E402.ipynb:9:1: E402 Module level import not at top of cell + | + 7 | os.path + 8 | + 9 | import pathlib + | ^^^^^^^^^^^^^^ E402 +10 | +11 | import a + | + +E402.ipynb:22:1: E402 Module level import not at top of cell + | +20 | __some__magic = 1 +21 | +22 | import c + | ^^^^^^^^ E402 +23 | import ok + | + +E402.ipynb:30:1: E402 Module level import not at top of cell + | +30 | import no_ok + | ^^^^^^^^^^^^ E402 + | + + diff --git a/crates/ruff_notebook/src/cell.rs b/crates/ruff_notebook/src/cell.rs index 1d039e7a67483..d80ef336de02e 100644 --- a/crates/ruff_notebook/src/cell.rs +++ b/crates/ruff_notebook/src/cell.rs @@ -175,7 +175,7 @@ impl Cell { } /// Cell offsets are used to keep track of the start and end offsets of each -/// cell in the concatenated source code. +/// cell in the concatenated source code. These offsets are in sorted order. #[derive(Clone, Debug, Default, PartialEq)] pub struct CellOffsets(Vec); @@ -186,7 +186,17 @@ impl CellOffsets { } /// Push a new offset to the end of the [`CellOffsets`]. + /// + /// # Panics + /// + /// Panics if the offset is less than the last offset pushed. pub(crate) fn push(&mut self, offset: TextSize) { + if let Some(last_offset) = self.0.last() { + assert!( + *last_offset <= offset, + "Offsets must be pushed in sorted order" + ); + } self.0.push(offset); } @@ -200,6 +210,22 @@ impl CellOffsets { } }) } + + /// Returns `true` if the given range contains a cell boundary. + pub fn has_cell_boundary(&self, range: TextRange) -> bool { + self.binary_search_by(|offset| { + if range.start() <= *offset { + if range.end() < *offset { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + } else { + std::cmp::Ordering::Less + } + }) + .is_ok() + } } impl Deref for CellOffsets { diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 210e287eed372..1fb53dc8c7bf6 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -68,7 +68,7 @@ pub enum TomlSourceType { Unrecognized, } -#[derive(Clone, Copy, Debug, Default, PartialEq, is_macro::Is)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, is_macro::Is)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum PySourceType { /// The source is a Python file (`.py`).