Skip to content

Commit

Permalink
Consider VS Code cell metadata for Jupyter Notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Aug 13, 2024
1 parent 7027344 commit 8e977dd
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 20 deletions.
5 changes: 3 additions & 2 deletions crates/red_knot_server/src/edit/notebook.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use anyhow::Ok;
use lsp_types::NotebookCellKind;
use ruff_notebook::CellMetadata;
use rustc_hash::{FxBuildHasher, FxHashMap};

use crate::{PositionEncoding, TextDocument};
Expand Down Expand Up @@ -65,7 +66,7 @@ impl NotebookDocument {
NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell {
execution_count: None,
id: None,
metadata: serde_json::Value::Null,
metadata: CellMetadata::default(),
outputs: vec![],
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
Expand All @@ -75,7 +76,7 @@ impl NotebookDocument {
ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell {
attachments: None,
id: None,
metadata: serde_json::Value::Null,
metadata: CellMetadata::default(),
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
),
Expand Down
17 changes: 17 additions & 0 deletions crates/ruff_linter/src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,23 @@ mod tests {
Ok(())
}

#[test]
fn test_vscode_language_id() -> Result<()> {
let actual = notebook_path("vscode_language_id.ipynb");
let expected = notebook_path("vscode_language_id_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = assert_notebook_path(
&actual,
expected,
&settings::LinterSettings::for_rule(Rule::UnusedImport),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}

#[test_case(Path::new("before_fix.ipynb"), true; "trailing_newline")]
#[test_case(Path::new("no_trailing_newline.ipynb"), false; "no_trailing_newline")]
fn test_trailing_newline(path: &Path, trailing_newline: bool) -> Result<()> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: crates/ruff_linter/src/linter.rs
---
vscode_language_id.ipynb:cell 3:1:8: F401 [*] `os` imported but unused
|
1 | import os
| ^^ F401
2 |
3 | print("hello world")
|
= help: Remove unused import: `os`

Safe fix
1 |-import os
2 1 |
3 2 | print("hello world")
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {
"vscode": {
"languageId": "javascript"
}
},
"outputs": [],
"source": []
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": []
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# VS Code `languageId`\n",
"\n",
"This is a test notebook for VS Code specific cell metadata.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "javascript"
}
},
"outputs": [],
"source": [
"function add(x, y) {\n",
" return x + y;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"print(\"hello world\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# VS Code `languageId`\n",
"\n",
"This is a test notebook for VS Code specific cell metadata.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "javascript"
}
},
"outputs": [],
"source": [
"function add(x, y) {\n",
" return x + y;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"print(\"hello world\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
21 changes: 16 additions & 5 deletions crates/ruff_notebook/src/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use itertools::Itertools;
use ruff_text_size::{TextRange, TextSize};

use crate::schema::{Cell, SourceValue};
use crate::CellMetadata;

impl fmt::Display for SourceValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Expand Down Expand Up @@ -35,7 +36,7 @@ impl Cell {
matches!(self, Cell::Code(_))
}

pub fn metadata(&self) -> &serde_json::Value {
pub fn metadata(&self) -> &CellMetadata {
match self {
Cell::Code(cell) => &cell.metadata,
Cell::Markdown(cell) => &cell.metadata,
Expand All @@ -54,11 +55,21 @@ impl Cell {

/// Return `true` if it's a valid code cell.
///
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
/// source doesn't contain a cell magic.
pub(crate) fn is_valid_code_cell(&self) -> bool {
/// A valid code cell is a cell where:
/// 1. The cell type is [`Cell::Code`]
/// 2. The source doesn't contain a cell magic
/// 3. If the language id is set, it should be `python`
pub(crate) fn is_valid_python_code_cell(&self) -> bool {
let source = match self {
Cell::Code(cell) => &cell.source,
Cell::Code(cell)
if cell
.metadata
.vscode
.as_ref()
.map_or(true, |vscode| vscode.language_id == "python") =>
{
&cell.source
}
_ => return false,
};
// Ignore cells containing cell magic as they act on the entire cell
Expand Down
16 changes: 8 additions & 8 deletions crates/ruff_notebook/src/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use ruff_text_size::TextSize;
use crate::cell::CellOffsets;
use crate::index::NotebookIndex;
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
use crate::{schema, RawNotebookMetadata};
use crate::{schema, CellMetadata, RawNotebookMetadata};

/// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
Expand Down Expand Up @@ -131,7 +131,7 @@ impl Notebook {
.cells
.iter()
.enumerate()
.filter(|(_, cell)| cell.is_valid_code_cell())
.filter(|(_, cell)| cell.is_valid_python_code_cell())
.map(|(cell_index, _)| u32::try_from(cell_index).unwrap())
.collect::<Vec<_>>();

Expand Down Expand Up @@ -205,16 +205,14 @@ impl Notebook {
})
}

/// Creates an empty notebook.
///
///
/// Creates an empty notebook with a single code cell.
pub fn empty() -> Self {
Self::from_raw_notebook(
RawNotebook {
cells: vec![schema::Cell::Code(schema::CodeCell {
execution_count: None,
id: None,
metadata: serde_json::Value::default(),
metadata: CellMetadata::default(),
outputs: vec![],
source: schema::SourceValue::String(String::default()),
})],
Expand Down Expand Up @@ -507,7 +505,9 @@ mod tests {
#[test_case("automagic_before_code", false)]
#[test_case("automagic_after_code", true)]
#[test_case("unicode_magic_gh9145", true)]
fn test_is_valid_code_cell(cell: &str, expected: bool) -> Result<()> {
#[test_case("vscode_language_id_python", true)]
#[test_case("vscode_language_id_javascript", false)]
fn test_is_valid_python_code_cell(cell: &str, expected: bool) -> Result<()> {
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
let path = notebook_path("cell").join(path);
Expand All @@ -516,7 +516,7 @@ mod tests {
}

assert_eq!(
read_jupyter_cell(format!("{cell}.json"))?.is_valid_code_cell(),
read_jupyter_cell(format!("{cell}.json"))?.is_valid_python_code_cell(),
expected
);
Ok(())
Expand Down
30 changes: 27 additions & 3 deletions crates/ruff_notebook/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ pub struct RawCell {
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub metadata: CellMetadata,
pub source: SourceValue,
}

Expand All @@ -137,7 +137,7 @@ pub struct MarkdownCell {
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub metadata: CellMetadata,
pub source: SourceValue,
}

Expand All @@ -153,12 +153,36 @@ pub struct CodeCell {
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub metadata: CellMetadata,
/// Execution, display, or stream outputs.
pub outputs: Vec<Value>,
pub source: SourceValue,
}

/// Cell-level metadata.
#[skip_serializing_none]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct CellMetadata {
/// VS Code specific cell metadata.
///
/// This is [`Some`] only if the cell's preferred language is different from the notebook's
/// preferred language.
/// https://github.com/microsoft/vscode/blob/e6c009a3d4ee60f352212b978934f52c4689fbd9/extensions/ipynb/src/serializers.ts#L117-L122
pub vscode: Option<CodeCellMetadataVSCode>,
/// Catch-all for metadata that isn't required by Ruff.
#[serde(flatten)]
pub extra: BTreeMap<String, Value>,
}

/// VS Code specific cell metadata.
/// https://github.com/microsoft/vscode/blob/e6c009a3d4ee60f352212b978934f52c4689fbd9/extensions/ipynb/src/serializers.ts#L104-L107
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct CodeCellMetadataVSCode {
/// https://code.visualstudio.com/docs/languages/identifiers
pub language_id: String,
}

/// Notebook root-level metadata.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)]
Expand Down
5 changes: 3 additions & 2 deletions crates/ruff_server/src/edit/notebook.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use anyhow::Ok;
use lsp_types::NotebookCellKind;
use ruff_notebook::CellMetadata;
use rustc_hash::{FxBuildHasher, FxHashMap};

use crate::{PositionEncoding, TextDocument};
Expand Down Expand Up @@ -65,7 +66,7 @@ impl NotebookDocument {
NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell {
execution_count: None,
id: None,
metadata: serde_json::Value::Null,
metadata: CellMetadata::default(),
outputs: vec![],
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
Expand All @@ -75,7 +76,7 @@ impl NotebookDocument {
ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell {
attachments: None,
id: None,
metadata: serde_json::Value::Null,
metadata: CellMetadata::default(),
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
),
Expand Down

0 comments on commit 8e977dd

Please sign in to comment.