diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb new file mode 100644 index 0000000000000..9f9579e77e5dd --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1331140f-2741-4661-9086-0764368710c9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4113383-725d-4f04-80b8-a3080b2b8c4b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.path\n", + "\n", + "import pathlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5d2ef63-ae60-4311-bae3-42e845afba3f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79599475-a5ee-4f60-80d1-6efa77693da0", + "metadata": {}, + "outputs": [], + "source": [ + "import a\n", + "\n", + "try:\n", + " import b\n", + "except ImportError:\n", + " pass\n", + "else:\n", + " pass\n", + "\n", + "__some__magic = 1\n", + "\n", + "import c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863dcc35-5c8d-4d05-8b4a-91059e944112", + "metadata": {}, + "outputs": [], + "source": [ + "import ok\n", + "\n", + "\n", + "def foo() -> None:\n", + " import e\n", + "\n", + "\n", + "import no_ok" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b2377d0-b814-4057-83ec-d443d8e19401", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff-playground)", + "language": "python", + "name": "ruff-playground" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index bf0eba8e4fd86..e1b276936e5cf 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -107,6 +107,8 @@ pub(crate) struct Checker<'a> { pub(crate) diagnostics: Vec, /// The list of names already seen by flake8-bugbear diagnostics, to avoid duplicate violations.. pub(crate) flake8_bugbear_seen: Vec, + /// The end offset of the last visited statement. + last_stmt_end: TextSize, } impl<'a> Checker<'a> { @@ -142,6 +144,7 @@ impl<'a> Checker<'a> { diagnostics: Vec::default(), flake8_bugbear_seen: Vec::default(), cell_offsets, + last_stmt_end: TextSize::default(), } } } @@ -268,6 +271,18 @@ where // Step 0: Pre-processing self.semantic.push_node(stmt); + // For Jupyter Notebooks, we'll reset the `IMPORT_BOUNDARY` flag when + // we encounter a cell boundary. + if self.source_type.is_ipynb() + && self.semantic.at_top_level() + && self.semantic.seen_import_boundary() + && self.cell_offsets.is_some_and(|cell_offsets| { + cell_offsets.has_cell_boundary(TextRange::new(self.last_stmt_end, stmt.start())) + }) + { + self.semantic.flags -= SemanticModelFlags::IMPORT_BOUNDARY; + } + // Track whether we've seen docstrings, non-imports, etc. match stmt { Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => { @@ -779,6 +794,7 @@ where self.semantic.flags = flags_snapshot; self.semantic.pop_node(); + self.last_stmt_end = stmt.end(); } fn visit_annotation(&mut self, expr: &'b Expr) { diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index 112be16261155..b2a02f5e23c55 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -37,6 +37,7 @@ mod tests { #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))] #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))] #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))] + #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.ipynb"))] #[test_case(Rule::MultipleImportsOnOneLine, Path::new("E40.py"))] #[test_case(Rule::MultipleStatementsOnOneLineColon, Path::new("E70.py"))] #[test_case(Rule::MultipleStatementsOnOneLineSemicolon, Path::new("E70.py"))] diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs index 4a037d7d0d438..443e044498b75 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{Alias, Stmt}; +use ruff_python_ast::{Alias, PySourceType, Stmt}; use ruff_text_size::Ranged; use crate::checkers::ast::Checker; @@ -34,7 +34,8 @@ impl Violation for MultipleImportsOnOneLine { } /// ## What it does -/// Checks for imports that are not at the top of the file. +/// Checks for imports that are not at the top of the file. For Jupyter notebooks, this +/// checks for imports that are not at the top of the cell. /// /// ## Why is this bad? /// According to [PEP 8], "imports are always put at the top of the file, just after any @@ -61,12 +62,18 @@ impl Violation for MultipleImportsOnOneLine { /// /// [PEP 8]: https://peps.python.org/pep-0008/#imports #[violation] -pub struct ModuleImportNotAtTopOfFile; +pub struct ModuleImportNotAtTopOfFile { + source_type: PySourceType, +} impl Violation for ModuleImportNotAtTopOfFile { #[derive_message_formats] fn message(&self) -> String { - format!("Module level import not at top of file") + if self.source_type.is_ipynb() { + format!("Module level import not at top of cell") + } else { + format!("Module level import not at top of file") + } } } @@ -82,8 +89,11 @@ pub(crate) fn multiple_imports_on_one_line(checker: &mut Checker, stmt: &Stmt, n /// E402 pub(crate) fn module_import_not_at_top_of_file(checker: &mut Checker, stmt: &Stmt) { if checker.semantic().seen_import_boundary() && checker.semantic().at_top_level() { - checker - .diagnostics - .push(Diagnostic::new(ModuleImportNotAtTopOfFile, stmt.range())); + checker.diagnostics.push(Diagnostic::new( + ModuleImportNotAtTopOfFile { + source_type: checker.source_type, + }, + stmt.range(), + )); } } diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap new file mode 100644 index 0000000000000..70562983d36e0 --- /dev/null +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E402_E402.ipynb.snap @@ -0,0 +1,29 @@ +--- +source: crates/ruff_linter/src/rules/pycodestyle/mod.rs +--- +E402.ipynb:9:1: E402 Module level import not at top of cell + | + 7 | os.path + 8 | + 9 | import pathlib + | ^^^^^^^^^^^^^^ E402 +10 | +11 | import a + | + +E402.ipynb:22:1: E402 Module level import not at top of cell + | +20 | __some__magic = 1 +21 | +22 | import c + | ^^^^^^^^ E402 +23 | import ok + | + +E402.ipynb:30:1: E402 Module level import not at top of cell + | +30 | import no_ok + | ^^^^^^^^^^^^ E402 + | + + diff --git a/crates/ruff_notebook/src/cell.rs b/crates/ruff_notebook/src/cell.rs index 1d039e7a67483..d80ef336de02e 100644 --- a/crates/ruff_notebook/src/cell.rs +++ b/crates/ruff_notebook/src/cell.rs @@ -175,7 +175,7 @@ impl Cell { } /// Cell offsets are used to keep track of the start and end offsets of each -/// cell in the concatenated source code. +/// cell in the concatenated source code. These offsets are in sorted order. #[derive(Clone, Debug, Default, PartialEq)] pub struct CellOffsets(Vec); @@ -186,7 +186,17 @@ impl CellOffsets { } /// Push a new offset to the end of the [`CellOffsets`]. + /// + /// # Panics + /// + /// Panics if the offset is less than the last offset pushed. pub(crate) fn push(&mut self, offset: TextSize) { + if let Some(last_offset) = self.0.last() { + assert!( + *last_offset <= offset, + "Offsets must be pushed in sorted order" + ); + } self.0.push(offset); } @@ -200,6 +210,22 @@ impl CellOffsets { } }) } + + /// Returns `true` if the given range contains a cell boundary. + pub fn has_cell_boundary(&self, range: TextRange) -> bool { + self.binary_search_by(|offset| { + if range.start() <= *offset { + if range.end() < *offset { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + } else { + std::cmp::Ordering::Less + } + }) + .is_ok() + } } impl Deref for CellOffsets { diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 210e287eed372..1fb53dc8c7bf6 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -68,7 +68,7 @@ pub enum TomlSourceType { Unrecognized, } -#[derive(Clone, Copy, Debug, Default, PartialEq, is_macro::Is)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, is_macro::Is)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum PySourceType { /// The source is a Python file (`.py`).