From bced502a97665bff0e0cbb09b38145c46011e47f Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 23 Oct 2018 22:46:33 +0200 Subject: [PATCH] Cell and notebook metadata filter #105 #106 #110 --- jupytext/cell_metadata.py | 10 ++------ jupytext/cell_to_text.py | 27 +++++++++++---------- jupytext/combine.py | 48 +++++++++++++++++++++++++------------ jupytext/compare.py | 32 ++++++++++++++++++------- jupytext/header.py | 8 +++---- jupytext/jupytext.py | 7 +++--- tests/test_cell_metadata.py | 11 +++++---- tests/test_header.py | 4 ++-- 8 files changed, 88 insertions(+), 59 deletions(-) diff --git a/jupytext/cell_metadata.py b/jupytext/cell_metadata.py index ce19d4dac..b49879acb 100644 --- a/jupytext/cell_metadata.py +++ b/jupytext/cell_metadata.py @@ -26,14 +26,14 @@ _BOOLEAN_OPTIONS_DICTIONARY = [('hide_input', 'echo', True), ('hide_output', 'include', True)] -_IGNORE_METADATA = [ +_IGNORE_CELL_METADATA = '-' + ','.join([ # Frequent cell metadata that should not enter the text representation # (these metadata are preserved in the paired Jupyter notebook). 'autoscroll', 'collapsed', 'scrolled', 'trusted', 'ExecuteTime', # Pre-jupytext metadata 'skipline', 'noskipline', # Jupytext metadata - 'lines_to_next_cell', 'lines_to_end_of_cell_marker'] + 'lines_to_next_cell', 'lines_to_end_of_cell_marker']) _PERCENT_CELL = re.compile( r'(# |#)%%([^\{\[]*)(|\[raw\]|\[markdown\])([^\{\[]*)(|\{.*\})\s*$') @@ -68,7 +68,6 @@ def metadata_to_rmd_options(language, metadata): :return: """ options = (language or 'R').lower() - metadata = filter_metadata(metadata) if 'name' in metadata: options += ' ' + metadata['name'] + ',' del metadata['name'] @@ -304,11 +303,6 @@ def json_options_to_metadata(options, add_brackets=True): return {} -def filter_metadata(metadata): - """Filter technical metadata""" - return {k: metadata[k] for k in metadata if k not in _IGNORE_METADATA} - - def metadata_to_json_options(metadata): """Represent metadata as json text""" return json.dumps(metadata) diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py index 6dc12c1e0..c5ae835f5 100644 --- a/jupytext/cell_to_text.py +++ b/jupytext/cell_to_text.py @@ -3,9 +3,9 @@ import re from copy import copy from .languages import cell_language -from .cell_metadata import filter_metadata, is_active, \ - metadata_to_rmd_options, metadata_to_json_options, \ - metadata_to_double_percent_options +from .cell_metadata import is_active, _IGNORE_CELL_METADATA +from .cell_metadata import metadata_to_rmd_options, metadata_to_json_options, metadata_to_double_percent_options +from .metadata_filter import filter_metadata from .magics import comment_magic, escape_code_start from .cell_reader import LightScriptCellReader from .languages import _SCRIPT_EXTENSIONS @@ -32,11 +32,12 @@ class BaseCellExporter(object): """A class that represent a notebook cell as text""" default_comment_magics = None - def __init__(self, cell, default_language, ext, comment_magics=None): + def __init__(self, cell, default_language, ext, comment_magics=None, cell_metadata_filter=None): self.ext = ext self.cell_type = cell.cell_type self.source = cell_source(cell) - self.metadata = filter_metadata(cell.metadata) + self.metadata = copy(cell.metadata) + filter_metadata(self.metadata, cell_metadata_filter, _IGNORE_CELL_METADATA) self.language = cell_language(self.source) or default_language self.default_language = default_language self.comment = _SCRIPT_EXTENSIONS.get(ext, {}).get('comment', '#') @@ -96,8 +97,8 @@ class MarkdownCellExporter(BaseCellExporter): """A class that represent a notebook cell as Markdown""" default_comment_magics = False - def __init__(self, cell, default_language, ext, comment_magics=None): - BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics) + def __init__(self, *args, **kwargs): + BaseCellExporter.__init__(self, *args, **kwargs) self.comment = '' def code_to_text(self): @@ -119,8 +120,8 @@ class RMarkdownCellExporter(BaseCellExporter): """A class that represent a notebook cell as Markdown""" default_comment_magics = True - def __init__(self, cell, default_language, ext, comment_magics=None): - BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics) + def __init__(self, *args, **kwargs): + BaseCellExporter.__init__(self, *args, **kwargs) self.comment = '' def code_to_text(self): @@ -232,8 +233,8 @@ class RScriptCellExporter(BaseCellExporter): """A class that can represent a notebook cell as a R script""" default_comment_magics = True - def __init__(self, cell, default_language, ext, comment_magics=None): - BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics) + def __init__(self, *args, **kwargs): + BaseCellExporter.__init__(self, *args, **kwargs) self.comment = "#'" def code_to_text(self): @@ -303,8 +304,8 @@ class SphinxGalleryCellExporter(BaseCellExporter): default_cell_marker = '#' * 79 default_comment_magics = True - def __init__(self, cell, default_language, ext, comment_magics=None): - BaseCellExporter.__init__(self, cell, default_language, ext, comment_magics) + def __init__(self, *args, **kwargs): + BaseCellExporter.__init__(self, *args, **kwargs) self.comment = '#' def code_to_text(self): diff --git a/jupytext/combine.py b/jupytext/combine.py index 95d564021..84cb5654b 100644 --- a/jupytext/combine.py +++ b/jupytext/combine.py @@ -1,9 +1,9 @@ """Combine source and outputs from two notebooks """ import re -import copy -from .cell_metadata import _IGNORE_METADATA -from .header import _DEFAULT_METADATA +from copy import copy +from .cell_metadata import _IGNORE_CELL_METADATA +from .header import _DEFAULT_NOTEBOOK_METADATA from .metadata_filter import filter_metadata _BLANK_LINE = re.compile(r'^\s*$') @@ -27,13 +27,13 @@ def combine_inputs_with_outputs(nb_source, nb_outputs): ext = text_representation.get('extension') format_name = text_representation.get('format_name') - nb_outputs_metadata = copy.deepcopy(nb_outputs.metadata) - nb_outputs_metadata = filter_metadata(nb_outputs_metadata, - nb_source.metadata.get('jupytext', {}).get('metadata', {}).get('notebook'), - _DEFAULT_METADATA) + nb_outputs_filtered_metadata = copy(nb_outputs.metadata) + filter_metadata(nb_outputs_filtered_metadata, + nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook'), + _DEFAULT_NOTEBOOK_METADATA) for key in nb_outputs.metadata: - if key not in nb_outputs_metadata: + if key not in nb_outputs_filtered_metadata: nb_source.metadata[key] = nb_outputs.metadata[key] for cell in nb_source.cells: @@ -48,18 +48,36 @@ def combine_inputs_with_outputs(nb_source, nb_outputs): cell.execution_count = ocell.execution_count cell.outputs = ocell.outputs - ometadata = ocell.metadata - cell.metadata.update(ometadata if (ext and ext.endswith('.md')) or format_name == 'sphinx' else - {k: ometadata[k] for k in ometadata if k in _IGNORE_METADATA}) + # Append cell metadata that was filtered + if (ext and ext.endswith('.md')) or format_name == 'sphinx': + ocell_filtered_metadata = {} + else: + ocell_filtered_metadata = copy(ocell.metadata) + filter_metadata(ocell_filtered_metadata, + nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells'), + _IGNORE_CELL_METADATA) + + for key in ocell.metadata: + if key not in ocell_filtered_metadata: + cell.metadata[key] = ocell.metadata[key] + output_code_cells = output_code_cells[(i + 1):] break else: for i, ocell in enumerate(output_other_cells): if cell.cell_type == ocell.cell_type and same_content(cell.source, ocell.source): - ometadata = ocell.metadata - cell.metadata.update(ometadata if ext and (ext.endswith('.md') or ext.endswith('.Rmd') or - format_name in ['spin', 'sphinx', 'sphinx']) else - {k: ometadata[k] for k in ometadata if k in _IGNORE_METADATA}) + if (ext and (ext.endswith('.md') or ext.endswith('.Rmd'))) \ + or format_name in ['spin', 'sphinx', 'sphinx']: + ocell_filtered_metadata = {} + else: + ocell_filtered_metadata = copy(ocell.metadata) + filter_metadata(ocell_filtered_metadata, + nb_source.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells'), + _IGNORE_CELL_METADATA) + + for key in ocell.metadata: + if key not in ocell_filtered_metadata: + cell.metadata[key] = ocell.metadata[key] output_other_cells = output_other_cells[(i + 1):] break diff --git a/jupytext/compare.py b/jupytext/compare.py index cde7fd0e7..8c77e23c9 100644 --- a/jupytext/compare.py +++ b/jupytext/compare.py @@ -1,20 +1,25 @@ """Compare two Jupyter notebooks""" import re +from copy import copy from testfixtures import compare -from .cell_metadata import _IGNORE_METADATA -from .header import _DEFAULT_METADATA +from .cell_metadata import _IGNORE_CELL_METADATA +from .header import _DEFAULT_NOTEBOOK_METADATA +from .metadata_filter import filter_metadata from .jupytext import reads, writes from .combine import combine_inputs_with_outputs _BLANK_LINE = re.compile(r'^\s*$') -def filtered_cell(cell, preserve_outputs): +def filtered_cell(cell, preserve_outputs, cell_metadata_filter): """Cell type, metadata and source from given cell""" + metadata = copy(cell.metadata) + filter_metadata(metadata, cell_metadata_filter, _IGNORE_CELL_METADATA) + filtered = {'cell_type': cell.cell_type, 'source': cell.source, - 'metadata': {key: cell.metadata[key] for key in cell.metadata if key not in _IGNORE_METADATA}} + 'metadata': metadata} if preserve_outputs: for key in ['execution_count', 'outputs']: @@ -26,7 +31,10 @@ def filtered_cell(cell, preserve_outputs): def filtered_notebook_metadata(notebook): """Notebook metadata, filtered for metadata added by Jupytext itself""" - return {key: notebook.metadata[key] for key in notebook.metadata if key != 'jupytext' and key in _DEFAULT_METADATA} + metadata = copy(notebook.metadata) + return filter_metadata(metadata, + notebook.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook'), + _DEFAULT_NOTEBOOK_METADATA.replace('jupytext,', '')) class NotebookDifference(Exception): @@ -73,6 +81,8 @@ def compare_notebooks(notebook_expected, or format_name in ['sphinx', 'spin']) allow_removed_final_blank_line = allow_expected_differences + cell_metadata_filter = notebook_actual.get('jupytext', {}).get('metadata_filter', {}).get('cells') + if format_name == 'sphinx' and notebook_actual.cells and notebook_actual.cells[0].source == '%matplotlib inline': notebook_actual.cells = notebook_actual.cells[1:] @@ -109,9 +119,9 @@ def compare_notebooks(notebook_expected, if allow_filtered_cell_metadata: ref_cell.metadata = {key: ref_cell.metadata[key] for key in ref_cell.metadata - if key not in _IGNORE_METADATA} + if key not in _IGNORE_CELL_METADATA} test_cell.metadata = {key: test_cell.metadata[key] for key in test_cell.metadata - if key not in _IGNORE_METADATA} + if key not in _IGNORE_CELL_METADATA} if ref_cell.metadata != test_cell.metadata: if raise_on_first_difference: @@ -172,8 +182,12 @@ def compare_notebooks(notebook_expected, if ref_cell.cell_type != 'code': continue - ref_cell = filtered_cell(ref_cell, preserve_outputs=compare_outputs) - test_cell = filtered_cell(test_cell, preserve_outputs=compare_outputs) + ref_cell = filtered_cell(ref_cell, + preserve_outputs=compare_outputs, + cell_metadata_filter=cell_metadata_filter) + test_cell = filtered_cell(test_cell, + preserve_outputs=compare_outputs, + cell_metadata_filter=cell_metadata_filter) try: compare(ref_cell, test_cell) diff --git a/jupytext/header.py b/jupytext/header.py index 76d9dcd6e..84ca3d40c 100644 --- a/jupytext/header.py +++ b/jupytext/header.py @@ -19,13 +19,13 @@ _LEFTSPACE_RE = re.compile(r"^\s") _UTF8_HEADER = ' -*- coding: utf-8 -*-' -_DEFAULT_METADATA = [ +_DEFAULT_NOTEBOOK_METADATA = ','.join([ # Preserve Jupytext section 'jupytext', # Preserve kernel specs and language_info 'kernelspec', 'language_info', # Kernel_info found in Nteract notebooks - 'kernel_info'] + 'kernel_info']) # Change this to False in tests INSERT_AND_CHECK_VERSION_NUMBER = True @@ -109,8 +109,8 @@ def metadata_and_cell_to_header(notebook, text_format, ext): if 'jupytext' in metadata and not metadata['jupytext']: del metadata['jupytext'] - notebook_metadata_filter = metadata.get('jupytext', {}).get('metadata', {}).get('notebook') - metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_METADATA) + notebook_metadata_filter = metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook') + metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA) if metadata: header.extend(yaml.safe_dump({'jupyter': metadata}, default_flow_style=False).splitlines()) diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py index 04a796932..cb8625f18 100644 --- a/jupytext/jupytext.py +++ b/jupytext/jupytext.py @@ -58,9 +58,9 @@ def reads(self, s, **_): lines = lines[pos:] if not metadata and self.format.format_name in ['markdown', 'light', 'sphinx', 'sphinx-rst2md']: - metadata['jupytext'] = {'metadata': {'notebook': False}} + metadata['jupytext'] = {'metadata_filter': {'notebook': False}} if not cell_metadata: - metadata['jupytext']['metadata']['cell'] = False + metadata['jupytext']['metadata_filter']['cells'] = False set_main_and_cell_language(metadata, cells, self.format.extension) @@ -92,6 +92,7 @@ def writes(self, nb, **kwargs): nb = deepcopy(nb) default_language = default_language_from_metadata_and_ext(nb, self.format.extension) comment_magics = nb.metadata.get('jupytext', {}).get('comment_magics') + cell_metadata_filter = nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells') if 'main_language' in nb.metadata.get('jupytext', {}): del nb.metadata['jupytext']['main_language'] @@ -107,7 +108,7 @@ def writes(self, nb, **kwargs): looking_for_first_markdown_cell = False cell_exporters.append(self.format.cell_exporter_class( - cell, default_language, self.format.extension, comment_magics)) + cell, default_language, self.format.extension, comment_magics, cell_metadata_filter)) texts = [cell.cell_to_text() for cell in cell_exporters] diff --git a/tests/test_cell_metadata.py b/tests/test_cell_metadata.py index 87039def1..a87028446 100644 --- a/tests/test_cell_metadata.py +++ b/tests/test_cell_metadata.py @@ -1,8 +1,8 @@ import pytest -from jupytext.cell_metadata import rmd_options_to_metadata, \ - metadata_to_rmd_options, parse_rmd_options, RMarkdownOptionParsingError, \ - try_eval_metadata, json_options_to_metadata, metadata_to_json_options, \ - md_options_to_metadata, filter_metadata +from jupytext.cell_metadata import rmd_options_to_metadata, metadata_to_rmd_options, parse_rmd_options +from jupytext.cell_metadata import _IGNORE_CELL_METADATA, RMarkdownOptionParsingError, try_eval_metadata +from jupytext.cell_metadata import json_options_to_metadata, metadata_to_json_options, md_options_to_metadata +from jupytext.metadata_filter import filter_metadata from .utils import skip_if_dict_is_not_ordered SAMPLES = [('r', ('R', {})), @@ -63,11 +63,12 @@ def test_parsing_error(options): def test_ignore_metadata(): metadata = {'trusted': True, 'hide_input': True} + metadata = filter_metadata(metadata, None, _IGNORE_CELL_METADATA) assert metadata_to_rmd_options('R', metadata) == 'r echo=FALSE' def test_filter_metadata(): - assert filter_metadata({'scrolled': True}) == {} + assert filter_metadata({'scrolled': True}, None, _IGNORE_CELL_METADATA) == {} def test_try_eval_metadata(): diff --git a/tests/test_header.py b/tests/test_header.py index 9ba9a5542..88a48705c 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -95,8 +95,8 @@ def test_notebook_from_plain_script_has_metadata_filter(script="""print('Hello w """): with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True): nb = jupytext.reads(script, '.py') - assert nb.metadata.get('jupytext', {}).get('metadata', {}).get('notebook') is False - assert nb.metadata.get('jupytext', {}).get('metadata', {}).get('cell') is False + assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook') is False + assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells') is False with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True): scripts2 = jupytext.writes(nb, '.py')