Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update E402 to work at cell level for notebooks #8872

Merged
merged 3 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/E402.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"\n",
"sys.path"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1331140f-2741-4661-9086-0764368710c9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4113383-725d-4f04-80b8-a3080b2b8c4b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.path\n",
"\n",
"import pathlib"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5d2ef63-ae60-4311-bae3-42e845afba3f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "79599475-a5ee-4f60-80d1-6efa77693da0",
"metadata": {},
"outputs": [],
"source": [
"import a\n",
"\n",
"try:\n",
" import b\n",
"except ImportError:\n",
" pass\n",
"else:\n",
" pass\n",
"\n",
"__some__magic = 1\n",
"\n",
"import c"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "863dcc35-5c8d-4d05-8b4a-91059e944112",
"metadata": {},
"outputs": [],
"source": [
"import ok\n",
"\n",
"\n",
"def foo() -> None:\n",
" import e\n",
"\n",
"\n",
"import no_ok"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b2377d0-b814-4057-83ec-d443d8e19401",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff-playground)",
"language": "python",
"name": "ruff-playground"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
16 changes: 16 additions & 0 deletions crates/ruff_linter/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ pub(crate) struct Checker<'a> {
pub(crate) diagnostics: Vec<Diagnostic>,
/// The list of names already seen by flake8-bugbear diagnostics, to avoid duplicate violations..
pub(crate) flake8_bugbear_seen: Vec<TextRange>,
/// The end offset of the last visited statement.
last_stmt_end: TextSize,
}

impl<'a> Checker<'a> {
Expand Down Expand Up @@ -142,6 +144,7 @@ impl<'a> Checker<'a> {
diagnostics: Vec::default(),
flake8_bugbear_seen: Vec::default(),
cell_offsets,
last_stmt_end: TextSize::default(),
}
}
}
Expand Down Expand Up @@ -268,6 +271,18 @@ where
// Step 0: Pre-processing
self.semantic.push_node(stmt);

// For Jupyter Notebooks, we'll reset the `IMPORT_BOUNDARY` flag when
// we encounter a cell boundary.
if self.source_type.is_ipynb()
&& self.semantic.at_top_level()
&& self.semantic.seen_import_boundary()
&& self.cell_offsets.is_some_and(|cell_offsets| {
cell_offsets.has_cell_boundary(TextRange::new(self.last_stmt_end, stmt.start()))
})
{
self.semantic.flags -= SemanticModelFlags::IMPORT_BOUNDARY;
}

// Track whether we've seen docstrings, non-imports, etc.
match stmt {
Stmt::ImportFrom(ast::StmtImportFrom { module, names, .. }) => {
Expand Down Expand Up @@ -779,6 +794,7 @@ where

self.semantic.flags = flags_snapshot;
self.semantic.pop_node();
self.last_stmt_end = stmt.end();
}

fn visit_annotation(&mut self, expr: &'b Expr) {
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ mod tests {
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.ipynb"))]
#[test_case(Rule::MultipleImportsOnOneLine, Path::new("E40.py"))]
#[test_case(Rule::MultipleStatementsOnOneLineColon, Path::new("E70.py"))]
#[test_case(Rule::MultipleStatementsOnOneLineSemicolon, Path::new("E70.py"))]
Expand Down
24 changes: 17 additions & 7 deletions crates/ruff_linter/src/rules/pycodestyle/rules/imports.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{Alias, Stmt};
use ruff_python_ast::{Alias, PySourceType, Stmt};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -34,7 +34,8 @@ impl Violation for MultipleImportsOnOneLine {
}

/// ## What it does
/// Checks for imports that are not at the top of the file.
/// Checks for imports that are not at the top of the file. For Jupyter notebooks, this
/// checks for imports that are not at the top of the cell.
///
/// ## Why is this bad?
/// According to [PEP 8], "imports are always put at the top of the file, just after any
Expand All @@ -61,12 +62,18 @@ impl Violation for MultipleImportsOnOneLine {
///
/// [PEP 8]: https://peps.python.org/pep-0008/#imports
#[violation]
pub struct ModuleImportNotAtTopOfFile;
pub struct ModuleImportNotAtTopOfFile {
source_type: PySourceType,
}

impl Violation for ModuleImportNotAtTopOfFile {
#[derive_message_formats]
fn message(&self) -> String {
format!("Module level import not at top of file")
if self.source_type.is_ipynb() {
format!("Module level import not at top of cell")
} else {
format!("Module level import not at top of file")
}
}
}

Expand All @@ -82,8 +89,11 @@ pub(crate) fn multiple_imports_on_one_line(checker: &mut Checker, stmt: &Stmt, n
/// E402
pub(crate) fn module_import_not_at_top_of_file(checker: &mut Checker, stmt: &Stmt) {
if checker.semantic().seen_import_boundary() && checker.semantic().at_top_level() {
checker
.diagnostics
.push(Diagnostic::new(ModuleImportNotAtTopOfFile, stmt.range()));
checker.diagnostics.push(Diagnostic::new(
ModuleImportNotAtTopOfFile {
source_type: checker.source_type,
},
stmt.range(),
));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
E402.ipynb:9:1: E402 Module level import not at top of cell
|
7 | os.path
8 |
9 | import pathlib
| ^^^^^^^^^^^^^^ E402
10 |
11 | import a
|

E402.ipynb:22:1: E402 Module level import not at top of cell
|
20 | __some__magic = 1
21 |
22 | import c
| ^^^^^^^^ E402
23 | import ok
|

E402.ipynb:30:1: E402 Module level import not at top of cell
|
30 | import no_ok
| ^^^^^^^^^^^^ E402
|


28 changes: 27 additions & 1 deletion crates/ruff_notebook/src/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl Cell {
}

/// Cell offsets are used to keep track of the start and end offsets of each
/// cell in the concatenated source code.
/// cell in the concatenated source code. These offsets are in sorted order.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct CellOffsets(Vec<TextSize>);

Expand All @@ -186,7 +186,17 @@ impl CellOffsets {
}

/// Push a new offset to the end of the [`CellOffsets`].
///
/// # Panics
///
/// Panics if the offset is less than the last offset pushed.
pub(crate) fn push(&mut self, offset: TextSize) {
if let Some(last_offset) = self.0.last() {
assert!(
*last_offset <= offset,
"Offsets must be pushed in sorted order"
);
}
self.0.push(offset);
}

Expand All @@ -200,6 +210,22 @@ impl CellOffsets {
}
})
}

/// Returns `true` if the given range contains a cell boundary.
pub fn has_cell_boundary(&self, range: TextRange) -> bool {
self.binary_search_by(|offset| {
if range.start() <= *offset {
if range.end() < *offset {
std::cmp::Ordering::Greater
} else {
std::cmp::Ordering::Equal
}
} else {
std::cmp::Ordering::Less
}
})
.is_ok()
}
}

impl Deref for CellOffsets {
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pub enum TomlSourceType {
Unrecognized,
}

#[derive(Clone, Copy, Debug, Default, PartialEq, is_macro::Is)]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, is_macro::Is)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PySourceType {
/// The source is a Python file (`.py`).
Expand Down
Loading