From 9088d714a87e510bc516d9ad590fdb1bc02ac60b Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Wed, 22 Nov 2023 11:12:35 -0600
Subject: [PATCH] Avoid `E703` for last expression in a cell

---
 .../test/fixtures/pycodestyle/E703.ipynb      | 94 +++++++++++++++++++
 crates/ruff_linter/src/checkers/tokens.rs     | 16 +++-
 crates/ruff_linter/src/linter.rs              |  3 +-
 .../ruff_linter/src/rules/pycodestyle/mod.rs  |  1 +
 .../pycodestyle/rules/compound_statements.rs  | 67 ++++++++++---
 ...__pycodestyle__tests__E703_E703.ipynb.snap | 46 +++++++++
 6 files changed, 212 insertions(+), 15 deletions(-)
 create mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb
 create mode 100644 crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap
diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb
new file mode 100644
index 0000000000000..97606b62f3792
--- /dev/null
+++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "481fb4bf-c1b9-47da-927f-3cfdfe4b49ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Simple case\n",
+    "x;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2f0c65a5-0a0e-4080-afce-5a8ed0d706df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Only skip the last expression\n",
+    "x;  # E703\n",
+    "x;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5a3fd75d-26d9-44f7-b013-1684aabfd0ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Nested expressions isn't relevant\n",
+    "if True:\n",
+    "    x;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "05eab5b9-e2ba-4954-8ef3-b035a79573fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Semicolons with multiple expressions\n",
+    "x; x;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9cbbddc5-83fc-4fdb-81ab-53a3912ae898",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Comments, newlines and whitespace\n",
+    "x; # comment\n",
+    "\n",
+    "# another comment"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff-playground)",
+   "language": "python",
+   "name": "ruff-playground"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
index b0f563c23b91f..e9d23ed91c626 100644
--- a/crates/ruff_linter/src/checkers/tokens.rs
+++ b/crates/ruff_linter/src/checkers/tokens.rs
@@ -2,6 +2,8 @@
 
 use std::path::Path;
 
+use ruff_notebook::CellOffsets;
+use ruff_python_ast::PySourceType;
 use ruff_python_parser::lexer::LexResult;
 use ruff_python_parser::Tok;
 
@@ -25,7 +27,8 @@ pub(crate) fn check_tokens(
     locator: &Locator,
     indexer: &Indexer,
     settings: &LinterSettings,
-    is_stub: bool,
+    source_type: PySourceType,
+    cell_offsets: Option<&CellOffsets>,
 ) -> Vec<Diagnostic> {
     let mut diagnostics: Vec<Diagnostic> = vec![];
 
@@ -112,7 +115,14 @@ pub(crate) fn check_tokens(
         Rule::MultipleStatementsOnOneLineSemicolon,
         Rule::UselessSemicolon,
     ]) {
-        pycodestyle::rules::compound_statements(&mut diagnostics, tokens, locator, indexer);
+        pycodestyle::rules::compound_statements(
+            &mut diagnostics,
+            tokens,
+            locator,
+            indexer,
+            source_type,
+            cell_offsets,
+        );
     }
 
     if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape {
@@ -156,7 +166,7 @@ pub(crate) fn check_tokens(
         pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens, locator);
     }
 
-    if is_stub && settings.rules.enabled(Rule::TypeCommentInStub) {
+    if source_type.is_stub() && settings.rules.enabled(Rule::TypeCommentInStub) {
         flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, indexer);
     }
 
diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs
index d14ef022217b2..f6382e8b30349 100644
--- a/crates/ruff_linter/src/linter.rs
+++ b/crates/ruff_linter/src/linter.rs
@@ -108,7 +108,8 @@ pub fn check_path(
             locator,
             indexer,
             settings,
-            source_type.is_stub(),
+            source_type,
+            source_kind.as_ipy_notebook().map(Notebook::cell_offsets),
         ));
     }
 
diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
index 583e696a36b0c..112be16261155 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
@@ -53,6 +53,7 @@ mod tests {
     #[test_case(Rule::TrueFalseComparison, Path::new("E712.py"))]
     #[test_case(Rule::TypeComparison, Path::new("E721.py"))]
     #[test_case(Rule::UselessSemicolon, Path::new("E70.py"))]
+    #[test_case(Rule::UselessSemicolon, Path::new("E703.ipynb"))]
     fn rules(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
         let diagnostics = test_path(
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
index 6dcdd19c8b4ec..2b7434dd23c17 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
@@ -1,6 +1,8 @@
+use ruff_notebook::CellOffsets;
+use ruff_python_ast::PySourceType;
 use ruff_python_parser::lexer::LexResult;
 use ruff_python_parser::Tok;
-use ruff_text_size::TextRange;
+use ruff_text_size::{TextRange, TextSize};
 
 use ruff_diagnostics::{AlwaysFixableViolation, Violation};
 use ruff_diagnostics::{Diagnostic, Edit, Fix};
@@ -101,6 +103,8 @@ pub(crate) fn compound_statements(
     lxr: &[LexResult],
     locator: &Locator,
     indexer: &Indexer,
+    source_type: PySourceType,
+    cell_offsets: Option<&CellOffsets>,
 ) {
     // Track the last seen instance of a variety of tokens.
     let mut colon = None;
@@ -127,7 +131,13 @@ pub(crate) fn compound_statements(
     let mut sqb_count = 0u32;
     let mut brace_count = 0u32;
 
-    for &(ref tok, range) in lxr.iter().flatten() {
+    // Track indentation.
+    let mut indent = 0u32;
+
+    // Keep the token iterator to perform lookaheads.
+    let mut tokens = lxr.iter().flatten();
+
+    while let Some(&(ref tok, range)) = tokens.next() {
         match tok {
             Tok::Lpar => {
                 par_count = par_count.saturating_add(1);
@@ -153,6 +163,12 @@ pub(crate) fn compound_statements(
                     continue;
                 }
             }
+            Tok::Indent => {
+                indent = indent.saturating_add(1);
+            }
+            Tok::Dedent => {
+                indent = indent.saturating_sub(1);
+            }
             _ => {}
         }
 
@@ -163,15 +179,24 @@ pub(crate) fn compound_statements(
         match tok {
             Tok::Newline => {
                 if let Some((start, end)) = semi {
-                    let mut diagnostic =
-                        Diagnostic::new(UselessSemicolon, TextRange::new(start, end));
-                    diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
-                        indexer
-                            .preceded_by_continuations(start, locator)
-                            .unwrap_or(start),
-                        end,
-                    )));
-                    diagnostics.push(diagnostic);
+                    if !(source_type.is_ipynb()
+                        && indent == 0
+                        && cell_offsets
+                            .and_then(|cell_offsets| cell_offsets.containing_range(range.start()))
+                            .is_some_and(|cell_range| {
+                                !has_non_trivia_tokens_till(tokens.clone(), cell_range.end())
+                            }))
+                    {
+                        let mut diagnostic =
+                            Diagnostic::new(UselessSemicolon, TextRange::new(start, end));
+                        diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
+                            indexer
+                                .preceded_by_continuations(start, locator)
+                                .unwrap_or(start),
+                            end,
+                        )));
+                        diagnostics.push(diagnostic);
+                    }
                 }
 
                 // Reset.
@@ -309,3 +334,23 @@ pub(crate) fn compound_statements(
         };
     }
 }
+
+/// Returns `true` if there are any non-trivia tokens from the given token
+/// iterator till the given end offset.
+fn has_non_trivia_tokens_till<'a>(
+    tokens: impl Iterator<Item = &'a (Tok, TextRange)>,
+    cell_end: TextSize,
+) -> bool {
+    for &(ref tok, tok_range) in tokens {
+        if tok_range.start() >= cell_end {
+            return false;
+        }
+        if !matches!(
+            tok,
+            Tok::Newline | Tok::Comment(_) | Tok::EndOfFile | Tok::NonLogicalNewline
+        ) {
+            return true;
+        }
+    }
+    false
+}
diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap
new file mode 100644
index 0000000000000..068f245637b5c
--- /dev/null
+++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap
@@ -0,0 +1,46 @@
+---
+source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
+---
+E703.ipynb:5:2: E703 [*] Statement ends with an unnecessary semicolon
+  |
+3 | x;
+4 | # Only skip the last expression
+5 | x;  # E703
+  |  ^ E703
+6 | x;
+7 | # Nested expressions isn't relevant
+  |
+  = help: Remove unnecessary semicolon
+
+ℹ Safe fix
+2 2 | # Simple case
+3 3 | x;
+4 4 | # Only skip the last expression
+5   |-x;  # E703
+  5 |+x  # E703
+6 6 | x;
+7 7 | # Nested expressions isn't relevant
+8 8 | if True:
+
+E703.ipynb:9:6: E703 [*] Statement ends with an unnecessary semicolon
+   |
+ 7 | # Nested expressions isn't relevant
+ 8 | if True:
+ 9 |     x;
+   |      ^ E703
+10 | # Semicolons with multiple expressions
+11 | x; x;
+   |
+   = help: Remove unnecessary semicolon
+
+ℹ Safe fix
+6  6  | x;
+7  7  | # Nested expressions isn't relevant
+8  8  | if True:
+9     |-    x;
+   9  |+    x
+10 10 | # Semicolons with multiple expressions
+11 11 | x; x;
+12 12 | # Comments, newlines and whitespace
+
+