diff --git a/jupytext/cli.py b/jupytext/cli.py index ef82d3b10..a1555c3dd 100644 --- a/jupytext/cli.py +++ b/jupytext/cli.py @@ -11,7 +11,9 @@ from .languages import _SCRIPT_EXTENSIONS -def convert_notebook_files(nb_files, fmt, input_format=None, output=None, test_round_trip=False, preserve_outputs=True): +def convert_notebook_files(nb_files, fmt, input_format=None, output=None, + test_round_trip=False, test_round_trip_strict=False, stop_on_first_error=True, + update=True): """ Export R markdown notebooks, python or R scripts, or Jupyter notebooks, to the opposite format @@ -20,7 +22,9 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, test_r :param fmt: destination format, e.g. "py:percent" :param output: None, destination file, or '-' for stdout :param test_round_trip: should round trip conversion be tested? - :param preserve_outputs: preserve the current outputs of .ipynb file + :param test_round_trip_strict: should round trip conversion be tested, with strict notebook comparison? + :param stop_on_first_error: when testing, should we stop on first error, or compare the full notebook? + :param update: preserve the current outputs of .ipynb file when possible :return: """ @@ -64,9 +68,11 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, test_r if not notebook: notebook = readf(nb_file, format_name=format_name) - if test_round_trip: + if test_round_trip or test_round_trip_strict: try: - test_round_trip_conversion(notebook, ext, format_name, preserve_outputs) + test_round_trip_conversion(notebook, ext, format_name, update, + allow_expected_differences=not test_round_trip_strict, + stop_on_first_error=stop_on_first_error) except NotebookDifference as error: notebooks_in_error += 1 print('{}: {}'.format(nb_file, str(error))) @@ -82,7 +88,7 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, test_r raise TypeError('Destination extension {} is not consistent' 'with format {} '.format(dest_ext, ext)) - save_notebook_as(notebook, nb_file, dest + ext, format_name, preserve_outputs) + save_notebook_as(notebook, nb_file, dest + ext, format_name, update) if notebooks_in_error: exit(notebooks_in_error) @@ -155,9 +161,15 @@ def cli_jupytext(args=None): parser.add_argument('--update', action='store_true', help='Preserve outputs of .ipynb destination ' '(when file exists and inputs match)') - parser.add_argument('--test', dest='test', action='store_true', - help='Test that notebook is stable under ' - 'round trip conversion') + test = parser.add_mutually_exclusive_group() + test.add_argument('--test', dest='test', action='store_true', + help='Test that notebook is stable under ' + 'round trip conversion, up to expected changes') + test.add_argument('--test-strict', dest='test_strict', action='store_true', + help='Test that notebook is strictly stable under ' + 'round trip conversion') + parser.add_argument('-x', '--stop', dest='stop_on_first_error', action='store_true', + help='Stop on first round trip conversion error, and report stack traceback') args = parser.parse_args(args) args.to = canonize_format(args.to, args.output) @@ -171,8 +183,8 @@ def cli_jupytext(args=None): if not args.notebooks: raise ValueError('Please specificy either --from or notebooks') - if args.update and args.to != 'ipynb': - raise ValueError('--update works exclusively with --to notebook') + if args.update and not (args.test or args.test_strict) and args.to != 'ipynb': + raise ValueError('--update works exclusively with --to notebook ') return args @@ -186,7 +198,9 @@ def jupytext(args=None): input_format=args.input_format, output=args.output, test_round_trip=args.test, - preserve_outputs=args.update) + test_round_trip_strict=args.test_strict, + stop_on_first_error=args.stop_on_first_error, + update=args.update) except ValueError as err: # (ValueError, TypeError, IOError) as err: print('jupytext: error: ' + str(err)) exit(1) diff --git a/jupytext/compare.py b/jupytext/compare.py index 2a67aa041..5460ed095 100644 --- a/jupytext/compare.py +++ b/jupytext/compare.py @@ -1,10 +1,13 @@ """Compare two Jupyter notebooks""" +import re from testfixtures import compare from .cell_metadata import _IGNORE_METADATA from .jupytext import reads, writes from .combine import combine_inputs_with_outputs +_BLANK_LINE = re.compile(r'^\s*$') + def filtered_cell(cell, preserve_outputs): """Cell type, metadata and source from given cell""" @@ -30,6 +33,27 @@ class NotebookDifference(Exception): pass +def same_content(ref_source, test_source, allow_removed_final_blank_line): + """Is the content of two cells the same, except for an optional final blank line?""" + if ref_source == test_source: + return True + + if not allow_removed_final_blank_line: + return False + + # Is ref identical to test, plus one blank line? + ref_source = ref_source.splitlines() + test_source = test_source.splitlines() + + if not ref_source: + return False + + if ref_source[:-1] != test_source: + return False + + return _BLANK_LINE.match(ref_source[-1]) + + def compare_notebooks(notebook_expected, notebook_actual, ext=None, @@ -51,6 +75,7 @@ def compare_notebooks(notebook_expected, # Compare cells type and content test_cell_iter = iter(notebook_actual.cells) modified_cells = set() + modified_cell_metadata = set() for i, ref_cell in enumerate(notebook_expected.cells, 1): try: test_cell = next(test_cell_iter) @@ -62,7 +87,7 @@ def compare_notebooks(notebook_expected, modified_cells.update(range(i, len(notebook_expected.cells) + 1)) break - ref_lines = [line for line in ref_cell.source.splitlines() if line != ''] + ref_lines = [line for line in ref_cell.source.splitlines() if not _BLANK_LINE.match(line)] test_lines = [] while True: @@ -89,12 +114,16 @@ def compare_notebooks(notebook_expected, try: compare(ref_cell.metadata, test_cell.metadata) except AssertionError as error: - raise NotebookDifference("Metadata differ on {} cell #{}: {}" - .format(test_cell.cell_type, i, str(error))) + raise NotebookDifference("Metadata differ on {} cell #{}: {}\nCell content:\n{}" + .format(test_cell.cell_type, i, str(error), ref_cell.source)) else: - modified_cells.add(i) + modified_cell_metadata.update(set(test_cell.metadata).difference(ref_cell.metadata)) + modified_cell_metadata.update(set(ref_cell.metadata).difference(test_cell.metadata)) + for key in set(ref_cell.metadata).intersection(test_cell.metadata): + if ref_cell.metadata[key] != test_cell.metadata[key]: + modified_cell_metadata.add(key) - test_lines.extend([line for line in test_cell.source.splitlines() if line != '']) + test_lines.extend([line for line in test_cell.source.splitlines() if not _BLANK_LINE.match(line)]) if ref_cell.cell_type != 'markdown': break @@ -123,10 +152,7 @@ def compare_notebooks(notebook_expected, # 3. bis test entire cell content if ref_cell.cell_type != 'markdown' or not allow_splitted_markdown_cells: - if (not allow_removed_final_blank_line and ref_cell.source != test_cell.source) or \ - (allow_removed_final_blank_line and - ref_cell.source != test_cell.source and - ref_cell.source != test_cell.source + '\n'): + if not same_content(ref_cell.source, test_cell.source, allow_removed_final_blank_line): try: compare(ref_cell.source, test_cell.source) except AssertionError as error: @@ -184,6 +210,8 @@ def compare_notebooks(notebook_expected, if modified_cells: error.append('Cells {} differ ({}/{})'.format(','.join([str(i) for i in modified_cells]), len(modified_cells), len(notebook_expected.cells))) + if modified_cell_metadata: + error.append("Cell metadata '{}' differ".format("', '".join([str(i) for i in modified_cell_metadata]))) if modified_metadata: error.append('Notebook metadata differ') @@ -191,13 +219,15 @@ def compare_notebooks(notebook_expected, raise NotebookDifference(' | '.join(error)) -def test_round_trip_conversion(notebook, ext, format_name, test_outputs): +def test_round_trip_conversion(notebook, ext, format_name, update, + allow_expected_differences=True, + stop_on_first_error=True): """Test round trip conversion for a Jupyter notebook""" text = writes(notebook, ext=ext, format_name=format_name) round_trip = reads(text, ext=ext, format_name=format_name) - compare_notebooks(notebook, round_trip, ext=ext, format_name=format_name) - - if test_outputs: + if update: combine_inputs_with_outputs(round_trip, notebook) - compare_notebooks(notebook, round_trip, ext=ext, format_name=format_name) + + compare_notebooks(notebook, round_trip, ext, format_name, allow_expected_differences, + raise_on_first_difference=stop_on_first_error) diff --git a/tests/test_compare.py b/tests/test_compare.py index 18acab3f8..e5069f505 100644 --- a/tests/test_compare.py +++ b/tests/test_compare.py @@ -64,7 +64,7 @@ def test_raise_on_different_cell_metadata(): def test_does_not_raise_on_blank_line_removed(): - ref = new_notebook(cells=[new_code_cell('1+1\n')]) + ref = new_notebook(cells=[new_code_cell('1+1\n ')]) test = new_notebook(cells=[new_code_cell('1+1')]) compare_notebooks(ref, test, ext='.py', format_name='light') @@ -125,4 +125,4 @@ def test_test_round_trip_conversion(): } ])], metadata={'main_language': 'python'}) - round_trip_conversion(notebook, '.py', None, test_outputs=True) + round_trip_conversion(notebook, '.py', None, update=True)