Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't force an UTF encoding for non-ascii Python notebooks #915

Merged
merged 2 commits into from
Feb 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Jupytext ChangeLog
**Changed**
- We have updated the pre-commit hooks and in particular we switched to the first stable version of `black==22.1.0`.
- We require `pandoc==2.16.2` for testing. The representation for code cells changed from ` ``` {.python}` to ` ``` python` in that version of Pandoc ([#906](https://github.com/mwouts/jupytext/issues/906)). We don't use `pandoc>=2.17` in tests at the moment because of the introduction of cell ids that cannot be filtered.

- Jupytext will not add anymore a UTF-8 encoding on Python scripts when the notebook contains non-ascii characters ([#907](https://github.com/mwouts/jupytext/issues/907))

1.13.6 (2022-01-11)
-------------------
Expand Down
8 changes: 6 additions & 2 deletions jupytext/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
from nbformat.v4.nbbase import new_raw_cell
from yaml.representer import SafeRepresenter

from .languages import _SCRIPT_EXTENSIONS, comment_lines
from .languages import (
_SCRIPT_EXTENSIONS,
comment_lines,
default_language_from_metadata_and_ext,
)
from .metadata_filter import _DEFAULT_NOTEBOOK_METADATA, filter_metadata
from .pep8 import pep8_lines_between_cells
from .version import __version__
Expand Down Expand Up @@ -58,7 +62,7 @@ def encoding_and_executable(notebook, metadata, ext):
if comment is not None:
if "encoding" in jupytext_metadata:
lines.append(jupytext_metadata.pop("encoding"))
else:
elif default_language_from_metadata_and_ext(metadata, ext) != "python":
for cell in notebook.cells:
try:
cell.source.encode("ascii")
Expand Down
44 changes: 44 additions & 0 deletions tests/test_remove_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import jupytext


def test_remove_encoding_907(tmp_path, python_notebook):
# Pair all notebooks to py:percent files
(tmp_path / "jupytext.toml").write_text('formats="ipynb,py:percent"')

# Create a contents manager
cm = jupytext.TextFileContentsManager()
cm.root_dir = str(tmp_path)

# Save the notebook in Jupyter
cm.save(dict(type="notebook", content=python_notebook), path="nb.ipynb")

# No encoding is present in the py file
py = (tmp_path / "nb.py").read_text()
assert "coding" not in py

# Add the encoding line
py = "# -*- coding: utf-8 -*-\n" + py
(tmp_path / "nb.py").write_text(py)

# Reload the notebook
nb = cm.get("nb.ipynb")["content"]
assert "encoding" in nb.metadata["jupytext"]

# Save the notebook
cm.save(dict(type="notebook", content=nb), path="nb.ipynb")

# The encoding is still present in the py file
py = (tmp_path / "nb.py").read_text()
assert py.startswith("# -*- coding: utf-8 -*-")

# Remove the encoding (mock ipyupgrade)
py = "\n".join(py.splitlines()[1:])
(tmp_path / "nb.py").write_text(py)

# Reload the notebook - the encoding is not there anymore
nb = cm.get("nb.ipynb")["content"]
assert "encoding" not in nb.metadata["jupytext"]

# Save the notebook - the encoding is not there anymore
py = (tmp_path / "nb.py").read_text()
assert "coding" not in py
33 changes: 32 additions & 1 deletion tests/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,47 @@ def test_write_non_ascii(tmpdir):
jupytext.write(nb, str(tmpdir.join("notebook.ipynb")))


def test_no_encoding_in_python_scripts(no_jupytext_version_number):
"""No UTF encoding should not be added to Python scripts"""
nb = new_notebook(
cells=[new_markdown_cell("α")],
metadata={
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3",
},
},
)

# Saving to and reading from py creates an encoding
py_light = jupytext.writes(nb, "py:light")
compare(
py_light,
"""# ---
# jupyter:
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---

# α
""",
)


def test_encoding_in_scripts_only(no_jupytext_version_number):
"""UTF encoding should not be added to markdown files"""
nb = new_notebook(
cells=[new_markdown_cell("α")],
metadata={
"encoding": "# -*- coding: utf-8 -*-",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3",
}
},
},
)

Expand Down