diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fad271949..cca7e0dec 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -5,7 +5,12 @@ Jupytext ChangeLog ----------------------- **Added** -- The `py:percent` format can encode Markdown cells as raw strings ([#836](https://github.com/mwouts/jupytext/issues/836)) +- The Jupytext CLI has a new `--diff` command to show the differences between two notebooks (and if you want to see the changes in a file being updated by Jupytext, use `--show-changes`) ([#799](https://github.com/mwouts/jupytext/issues/799)) +- Jupyter will show the diff between text and `ipynb` paired notebooks when it cannot open a paired notebook because the `ipynb` version is more recent. Also, if the inputs in the two files are identical then the notebook will open with no error ([#799](https://github.com/mwouts/jupytext/issues/799)) +- The `py:percent` format will use raw strings when encoding Markdown cells as string, if they contain backslash characters ([#836](https://github.com/mwouts/jupytext/issues/836)) + +**Fixed** +- We have upgraded the jupyterlab extension dependencies and especially `ansi-regex` to fix a security vulnerability ([#857](https://github.com/mwouts/jupytext/issues/857)) **Changed** - The Jupytext configuration file is reloaded only when a notebook is opened, saved, or when a different folder is explored ([#797](https://github.com/mwouts/jupytext/issues/797)) diff --git a/jupytext/cli.py b/jupytext/cli.py index 0fee009d6..e7e757034 100644 --- a/jupytext/cli.py +++ b/jupytext/cli.py @@ -228,6 +228,16 @@ def parse_jupytext_args(args=None): "write the text representation of the notebook, e.g.: " "jupytext notebook.ipynb --pipe 'black {}'", ) + parser.add_argument( + "--diff", + "-d", + action="store_true", + help="Show the differences between (the inputs) of two notebooks", + ) + parser.add_argument( + "--diff-format", + help="The text format used to show differences in --diff", + ) parser.add_argument( "--check", action="append", @@ -281,8 +291,7 @@ def parse_jupytext_args(args=None): help="Quiet mode: do not comment about files being updated or created", ) parser.add_argument( - "--diff", - "-d", + "--show-changes", action="store_true", help="Display the diff for each output file", ) @@ -379,6 +388,7 @@ def log(text): and not args.output and not args.sync and not args.pipe + and not args.diff and not args.check and not args.update_metadata and not args.format_options @@ -386,10 +396,54 @@ def log(text): and not args.execute ): raise ValueError( - "Please provide one of --to, --output, --set-formats, --sync, --pipe, " + "Please provide one of --to, --output, --set-formats, --sync, --pipe, --diff, " "--check, --update-metadata, --format-options, --set-kernel or --execute" ) + if args.diff: + if ( + len(args.notebooks) != 2 + or args.output_format + or args.output + or args.sync + or args.pipe + or args.check + or args.update_metadata + or args.format_options + or args.set_kernel + or args.execute + ): + raise ValueError( + "Please provide two notebooks after 'jupytext --diff'.\n" + "NB: Use --show-changes if you wish to see the changes in " + "a notebook being updated by Jupytext." + ) + + nb_file1, nb_file2 = args.notebooks + nb1 = read(nb_file1) + nb2 = read(nb_file2) + + def fmt_if_not_ipynb(nb): + fmt = nb.metadata["jupytext"]["text_representation"] + if fmt["extension"] == ".ipynb": + return None + return short_form_one_format(fmt) + + diff_fmt = ( + args.diff_format or fmt_if_not_ipynb(nb1) or fmt_if_not_ipynb(nb2) or "md" + ) + + diff = compare( + writes(nb2, diff_fmt), + writes(nb1, diff_fmt), + nb_file2, + nb_file1, + return_diff=True, + ) + sys.stdout.write(diff) + + return + if args.output and len(args.notebooks) != 1: raise ValueError("Please input a single notebook when using --output") @@ -756,7 +810,7 @@ def lazy_write(path, fmt=None, action=None, update_timestamp_only=False): with open(path, encoding="utf-8") as fp: current_content = fp.read() modified = new_content != current_content - if modified and args.diff: + if modified and args.show_changes: diff = compare( new_content, current_content, diff --git a/jupytext/contentsmanager.py b/jupytext/contentsmanager.py index 13ed16f88..3b6c11d5e 100644 --- a/jupytext/contentsmanager.py +++ b/jupytext/contentsmanager.py @@ -39,7 +39,7 @@ full_path, paired_paths, ) -from .pairs import latest_inputs_and_outputs, read_pair, write_pair +from .pairs import PairedFilesDiffer, latest_inputs_and_outputs, read_pair, write_pair def build_jupytext_contents_manager_class(base_contents_manager_class): @@ -302,6 +302,7 @@ def read_one_file(alt_path, alt_fmt): # Before we combine the two files, we make sure we're not overwriting ipynb cells # with an outdated text file + content = None try: if ( outputs.timestamp @@ -309,35 +310,64 @@ def read_one_file(alt_path, alt_fmt): > inputs.timestamp + timedelta(seconds=config.outdated_text_notebook_margin) ): - raise HTTPError( - 400, - """{out} (last modified {out_last}) - seems more recent than {src} (last modified {src_last}) - Please either: - - open {src} in a text editor, make sure it is up to date, and save it, - - or delete {src} if not up to date, - - or increase check margin by adding, say, - outdated_text_notebook_margin = 5 # default is 1 (second) - to your jupytext.toml file - """.format( + ts_mismatch = ( + "{out} (last modified {out_last}) is more recent than " + "{src} (last modified {src_last})".format( src=inputs.path, src_last=inputs.timestamp, out=outputs.path, out_last=outputs.timestamp, - ), + ) ) + self.log.warning(ts_mismatch) + + try: + content = read_pair( + inputs, outputs, read_one_file, must_match=True + ) + self.log.warning( + "The inputs in {src} and {out} are identical, " + "so the mismatch in timestamps was ignored".format( + src=inputs.path, out=outputs.path + ) + ) + except HTTPError: + raise + except PairedFilesDiffer as diff: + raise HTTPError( + 400, + """{ts_mismatch} + +Differences (jupytext --diff {src} {out}) are: +{diff} +Please either: +- open {src} in a text editor, make sure it is up to date, and save it, +- or delete {src} if not up to date, +- or increase check margin by adding, say, +outdated_text_notebook_margin = 5 # default is 1 (second) +to your jupytext.toml file + """.format( + ts_mismatch=ts_mismatch, + src=inputs.path, + out=outputs.path, + diff=diff, + ), + ) except OverflowError: pass - try: - model["content"] = read_pair(inputs, outputs, read_one_file) - except HTTPError: - raise - except Exception as err: - self.log.error( - u"Error while reading file: %s %s", path, err, exc_info=True - ) - raise HTTPError(500, str(err)) + if content is not None: + model["content"] = content + else: + try: + model["content"] = read_pair(inputs, outputs, read_one_file) + except HTTPError: + raise + except Exception as err: + self.log.error( + u"Error while reading file: %s %s", path, err, exc_info=True + ) + raise HTTPError(500, str(err)) if not outputs.timestamp: set_kernelspec_from_language(model["content"]) diff --git a/jupytext/pairs.py b/jupytext/pairs.py index 8fa8105e2..d9ca44bef 100644 --- a/jupytext/pairs.py +++ b/jupytext/pairs.py @@ -2,7 +2,10 @@ from collections import namedtuple +import jupytext + from .combine import combine_inputs_with_outputs +from .compare import compare from .formats import ( check_file_version, long_form_multiple_formats, @@ -13,6 +16,10 @@ NotebookFile = namedtuple("notebook_file", "path fmt timestamp") +class PairedFilesDiffer(ValueError): + """An error when the two representations of a paired notebook differ""" + + def write_pair(path, formats, write_one_file): """ Call the function 'write_one_file' on each of the paired path/formats @@ -106,7 +113,7 @@ def latest_inputs_and_outputs( ) -def read_pair(inputs, outputs, read_one_file): +def read_pair(inputs, outputs, read_one_file, must_match=False): """Read a notebook given its inputs and outputs path and formats""" if not outputs.path or outputs.path == inputs.path: return read_one_file(inputs.path, inputs.fmt) @@ -114,7 +121,17 @@ def read_pair(inputs, outputs, read_one_file): notebook = read_one_file(inputs.path, inputs.fmt) check_file_version(notebook, inputs.path, outputs.path) - outputs = read_one_file(outputs.path, outputs.fmt) - notebook = combine_inputs_with_outputs(notebook, outputs, fmt=inputs.fmt) + notebook_with_outputs = read_one_file(outputs.path, outputs.fmt) + + if must_match: + in_text = jupytext.writes(notebook, inputs.fmt) + out_text = jupytext.writes(notebook_with_outputs, inputs.fmt) + diff = compare(out_text, in_text, outputs.path, inputs.path, return_diff=True) + if diff: + raise PairedFilesDiffer(diff) + + notebook = combine_inputs_with_outputs( + notebook, notebook_with_outputs, fmt=inputs.fmt + ) return notebook diff --git a/tests/test_cli.py b/tests/test_cli.py index 5bd040613..e7b66250d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1183,7 +1183,16 @@ def test_diff(tmpdir, cwd_tmpdir, capsys): write(new_notebook(cells=[new_code_cell("1 + 1")]), "test.ipynb") write(new_notebook(cells=[new_code_cell("2 + 2")]), "test.py", fmt="py:percent") - jupytext(["--to", "py:percent", "test.ipynb", "--diff"]) + jupytext(["--diff", "test.py", "test.ipynb"]) + captured = capsys.readouterr() + assert "-2 + 2\n+1 + 1" in captured.out + + +def test_show_changes(tmpdir, cwd_tmpdir, capsys): + write(new_notebook(cells=[new_code_cell("1 + 1")]), "test.ipynb") + write(new_notebook(cells=[new_code_cell("2 + 2")]), "test.py", fmt="py:percent") + + jupytext(["--to", "py:percent", "test.ipynb", "--show-changes"]) captured = capsys.readouterr() assert "-2 + 2\n+1 + 1" in captured.out @@ -1359,7 +1368,7 @@ def test_use_source_timestamp(tmpdir, cwd_tmpdir, python_notebook, capsys, forma if formats == "ipynb,py": from tornado.web import HTTPError - with pytest.raises(HTTPError, match="seems more recent than test.py"): + with pytest.raises(HTTPError, match="is more recent than test.py"): cm.get("test.ipynb") else: cm.get("test.ipynb") diff --git a/tests/test_contentsmanager.py b/tests/test_contentsmanager.py index 7be9e2403..8ef641044 100644 --- a/tests/test_contentsmanager.py +++ b/tests/test_contentsmanager.py @@ -463,9 +463,6 @@ def test_load_save_rename_non_ascii_path(nb_file, tmpdir): @pytest.mark.parametrize("nb_file", list_notebooks("ipynb_py")[:1]) def test_outdated_text_notebook(nb_file, tmpdir): # 1. write py ipynb - tmp_ipynb = u"notebook.ipynb" - tmp_nbpy = u"notebook.py" - cm = jupytext.TextFileContentsManager() cm.formats = "py,ipynb" cm.outdated_text_notebook_margin = 0 @@ -473,9 +470,9 @@ def test_outdated_text_notebook(nb_file, tmpdir): # open ipynb, save py, reopen nb = jupytext.read(nb_file) - cm.save(model=notebook_model(nb), path=tmp_nbpy) - model_py = cm.get(tmp_nbpy, load_alternative_format=False) - model_ipynb = cm.get(tmp_ipynb, load_alternative_format=False) + cm.save(model=notebook_model(nb), path="notebook.py") + model_py = cm.get("notebook.py", load_alternative_format=False) + model_ipynb = cm.get("notebook.ipynb", load_alternative_format=False) # 2. check that time of ipynb <= py assert model_ipynb["last_modified"] <= model_py["last_modified"] @@ -483,21 +480,98 @@ def test_outdated_text_notebook(nb_file, tmpdir): # 3. wait some time time.sleep(0.5) - # 4. touch ipynb - with open(str(tmpdir.join(tmp_ipynb)), "a"): - os.utime(str(tmpdir.join(tmp_ipynb)), None) + # 4. modify ipynb + nb.cells.append(new_markdown_cell("New cell")) + write(nb, str(tmpdir.join("notebook.ipynb"))) # 5. test error with pytest.raises(HTTPError): - cm.get(tmp_nbpy) + cm.get("notebook.py") # 6. test OK with cm.outdated_text_notebook_margin = 1.0 - cm.get(tmp_nbpy) + cm.get("notebook.py") # 7. test OK with cm.outdated_text_notebook_margin = float("inf") - cm.get(tmp_nbpy) + cm.get("notebook.py") + + +def test_outdated_text_notebook_no_diff_ok(tmpdir, python_notebook): + # 1. write py ipynb + cm = jupytext.TextFileContentsManager() + cm.formats = "py,ipynb" + cm.outdated_text_notebook_margin = 0 + cm.root_dir = str(tmpdir) + + # open ipynb, save py, reopen + nb = python_notebook + cm.save(model=notebook_model(nb), path="notebook.py") + model_py = cm.get("notebook.py", load_alternative_format=False) + model_ipynb = cm.get("notebook.ipynb", load_alternative_format=False) + + # 2. check that time of ipynb <= py + assert model_ipynb["last_modified"] <= model_py["last_modified"] + + # 3. wait some time + time.sleep(0.5) + + # 4. touch ipynb + with open(tmpdir / "notebook.ipynb", "a"): + os.utime(tmpdir / "notebook.ipynb", None) + + # 5. No error since both files correspond to the same notebook #799 + cm.get("notebook.py") + + +def test_outdated_text_notebook_diff_is_shown(tmpdir, python_notebook): + # 1. write py ipynb + cm = jupytext.TextFileContentsManager() + cm.formats = "py,ipynb" + cm.outdated_text_notebook_margin = 0 + cm.root_dir = str(tmpdir) + + # open ipynb, save py, reopen + nb = python_notebook + nb.cells = [new_markdown_cell("Text version 1.0")] + cm.save(model=notebook_model(nb), path="notebook.py") + model_py = cm.get("notebook.py", load_alternative_format=False) + model_ipynb = cm.get("notebook.ipynb", load_alternative_format=False) + + # 2. check that time of ipynb <= py + assert model_ipynb["last_modified"] <= model_py["last_modified"] + + # 3. wait some time + time.sleep(0.5) + + # 4. modify ipynb + nb.cells = [new_markdown_cell("Text version 2.0")] + jupytext.write(nb, str(tmpdir / "notebook.ipynb")) + + # 5. The diff is shown in the error + with pytest.raises(HTTPError) as excinfo: + cm.get("notebook.py") + + diff = excinfo.value.log_message + + diff = diff[diff.find("Differences") : diff.rfind("Please")] + + compare( + # In the reference below, lines with a single space + # have been stripped by the pre-commit hook + diff.replace("\n \n", "\n\n"), + """Differences (jupytext --diff notebook.py notebook.ipynb) are: +--- notebook.py ++++ notebook.ipynb +@@ -12,5 +12,5 @@ + # name: python_kernel + # --- + +-# Text version 1.0 ++# Text version 2.0 + +""", + ) @pytest.mark.parametrize("nb_file", list_notebooks("ipynb_py")[:1]) diff --git a/tests/test_pre_commit_4_sync_execute.py b/tests/test_pre_commit_4_sync_execute.py index 7b64516f7..0fc232b5a 100644 --- a/tests/test_pre_commit_4_sync_execute.py +++ b/tests/test_pre_commit_4_sync_execute.py @@ -31,7 +31,7 @@ def test_pre_commit_hook_sync_execute( rev: {jupytext_repo_rev} hooks: - id: jupytext - args: [--sync, --execute, --diff] + args: [--sync, --execute, --show-changes] additional_dependencies: - nbconvert """ diff --git a/tests/test_pre_commit_5_reformat_markdown.py b/tests/test_pre_commit_5_reformat_markdown.py index f542b19cf..52a2c31d1 100644 --- a/tests/test_pre_commit_5_reformat_markdown.py +++ b/tests/test_pre_commit_5_reformat_markdown.py @@ -36,11 +36,11 @@ def test_pre_commit_hook_sync_reformat_code_and_markdown( rev: {jupytext_repo_rev} hooks: - id: jupytext - args: [--sync, --pipe-fmt, ipynb, --pipe, 'pandoc --from ipynb --to ipynb --markdown-headings=atx', --diff] + args: [--sync, --pipe-fmt, ipynb, --pipe, 'pandoc --from ipynb --to ipynb --markdown-headings=atx', --show-changes] additional_dependencies: - nbformat==5.0.8 # because pandoc 2.11.4 does not preserve yet the new cell ids - id: jupytext - args: [--sync, --pipe, black, --diff] + args: [--sync, --pipe, black, --show-changes] additional_dependencies: - black==20.8b1 # Matches black hook below - nbformat==5.0.8 # for compatibility with the pandoc hook above diff --git a/tests/test_pre_commit_mode.py b/tests/test_pre_commit_mode.py index b802ec05a..a552b4360 100644 --- a/tests/test_pre_commit_mode.py +++ b/tests/test_pre_commit_mode.py @@ -154,7 +154,9 @@ def test_pre_commit_local_config(tmpdir, cwd_tmpdir, tmp_repo, python_notebook, tmp_repo.git.add(".") capsys.readouterr() - exit_code = jupytext(["--pre-commit-mode", "--sync", "test.ipynb", "--diff"]) + exit_code = jupytext( + ["--pre-commit-mode", "--sync", "test.ipynb", "--show-changes"] + ) out, err = capsys.readouterr() assert not err, err