Metadata filter on text documents

Former option freeze_metadata was removed. Now, opening a text document with no jupyter metadata always creates a metadata filter in the notebook. #124
mwouts · Dec 14, 2018 · 2af2377 · 2af2377
1 parent 7637dfc
commit 2af2377
Show file tree

Hide file tree

Showing 28 changed files with 43 additions and 99 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -8,8 +8,8 @@ Release History
 
 **Improvements**
 
-- TODO: By default, Jupytext does not add metadata to a Python script or Markdown document that had no metadata (#124)
-- TODO: File names with dot are supported (#138)
+- Python scripts or Markdown documents that have no Jupyter metadata receive a metadata filter that ensures that metadata is not exported back to the text representation (#124)
+- Metadata filters are represented as strings rather than dictionaries. Previous syntax from #105 is still supported.
 - TODO: First version of a Jupyter notebook extension (#86)
 
 **BugFixes**

diff --git a/README.md b/README.md
@@ -133,18 +133,12 @@ c.ContentsManager.default_notebook_metadata_filter = "all,-widgets,-varInspector
 c.ContentsManager.default_cell_metadata_filter = "ExecuteTime,autoscroll,-hide_output"
 ```
 
-If you want that the text files created by Jupytext have no metadata, you may use the global metadata filters below. Please note that with this setting, the metadata is only preserved in the `.ipynb` file &mdash; be sure to open that file in Jupyter, and not the text file which will miss the pairing information.
+If you want that the text files created by Jupytext have no metadata, you may use the global metadata filters below. Please note that with this setting, the metadata is only preserved in the `.ipynb` file.
 ```python
 c.ContentsManager.default_notebook_metadata_filter = "-all"
 c.ContentsManager.default_cell_metadata_filter = "-all"
 ```
 
-Finally, if you want that Jupytext exports no other metadata that the one already present in pre-existing scripts or markdowns files, use:
-```python
-# Do not add new metadata when editing a markdown document or a script
-c.ContentsManager.freeze_metadata = True
-```
-
 NB: All these global options (and more) are documented [here](https://github.com/mwouts/jupytext/blob/master/jupytext/contentsmanager.py).
 
 ## Command line conversion
@@ -213,7 +207,8 @@ jupytext --test --update notebook.ipynb --to py:percent
 Note that `jupytext --test` compares the resulting notebooks according to its expectations. If you wish to proceed to a strict comparison of the two notebooks, use `jupytext --test-strict`, and use the flag `-x` to report with more details on the first difference, if any.
 
 Please note that
-- When you associate a Jupyter kernel with your text notebook, that information goes to a YAML header at the top of your script or Markdown document. And Jupytext itself may create a `jupytext` entry in the notebook metadata. Have a look at the [`freeze_metadata` option](#default-metadata-filtering) if you want to avoid this.
+- Scripts opened with Jupyter have a default [metadata filter](#default-metadata-filtering) that prevents additional notebook or cell
+metadata to be added back to the script. Remove the filter if you want to store Jupytext's settings, or the kernel information, in the text file.
 - Cell metadata are available in `light` and `percent` formats for all cell types. Sphinx Gallery scripts in `sphinx` format do not support cell metadata. R Markdown and R scripts in `spin` format support cell metadata for code cells only. Markdown documents do not support cell metadata.
 - By default, a few cell metadata are not included in the text representation of the notebook. And only the most standard notebook metadata are exported. Learn more on this in the sections for [notebook specific](#-per-notebook-configuration) and [global settings](#default-metadata-filtering) for metadata filtering.
 - Representing a Jupyter notebook as a Markdown or R Markdown document has the effect of splitting markdown cells with two consecutive blank lines into multiple cells (as the two blank line pattern is used to separate cells).

diff --git a/jupytext/cli.py b/jupytext/cli.py
@@ -16,7 +16,7 @@
 
 def convert_notebook_files(nb_files, fmt, input_format=None, output=None, pre_commit=False,
                            test_round_trip=False, test_round_trip_strict=False, stop_on_first_error=True,
-                           update=True, freeze_metadata=False, comment_magics=None):
+                           update=True, comment_magics=None):
     """
     Export R markdown notebooks, python or R scripts, or Jupyter notebooks,
     to the opposite format
@@ -29,7 +29,6 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, pre_co
     :param test_round_trip_strict: should round trip conversion be tested, with strict notebook comparison?
     :param stop_on_first_error: when testing, should we stop on first error, or compare the full notebook?
     :param update: preserve the current outputs of .ipynb file
-    :param freeze_metadata: set metadata filters equal to the current script metadata
     :param comment_magics: comment, or not, Jupyter magics
     when possible
     :return:
@@ -71,8 +70,7 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, pre_co
         if nb_file == sys.stdin:
             dest = None
             current_ext, _ = parse_one_format(input_format)
-            notebook = reads(nb_file.read(), ext=current_ext, format_name=format_name,
-                             freeze_metadata=freeze_metadata)
+            notebook = reads(nb_file.read(), ext=current_ext, format_name=format_name)
         else:
             dest, current_ext = os.path.splitext(nb_file)
             notebook = None
@@ -90,8 +88,7 @@ def convert_notebook_files(nb_files, fmt, input_format=None, output=None, pre_co
             input_format = None
 
         if not notebook:
-            notebook = readf(nb_file, format_name=format_name,
-                             freeze_metadata=freeze_metadata)
+            notebook = readf(nb_file, format_name=format_name)
 
         if test_round_trip or test_round_trip_strict:
             try:
@@ -240,10 +237,6 @@ def cli_jupytext(args=None):
                         nargs='?',
                         default=None,
                         help='Should Jupyter magic commands be commented? (Y)es/(T)rue/(N)o/(F)alse/(D)efault')
-    parser.add_argument('--freeze-metadata', action='store_true',
-                        help='Set a metadata filter (unless one exists already) '
-                             'equal to the current metadata of the notebook. Use this '
-                             'to avoid creating a YAML header when editing text files.')
     test = parser.add_mutually_exclusive_group()
     test.add_argument('--test', dest='test', action='store_true',
                       help='Test that notebook is stable under '
@@ -301,7 +294,6 @@ def jupytext(args=None):
                                test_round_trip_strict=args.test_strict,
                                stop_on_first_error=args.stop_on_first_error,
                                update=args.update,
-                               freeze_metadata=args.freeze_metadata,
                                comment_magics=args.comment_magics)
     except (ValueError, TypeError, IOError) as err:
         print('jupytext: error: ' + str(err))

diff --git a/jupytext/contentsmanager.py b/jupytext/contentsmanager.py
@@ -42,11 +42,9 @@ def _writes(nbk, version=nbformat.NO_CONVERT, **kwargs):
     return _writes
 
 
-def _jupytext_reads(ext, format_name, rst2md, freeze_metadata):
+def _jupytext_reads(ext, format_name, rst2md):
     def _reads(text, as_version, **kwargs):
-        return jupytext.reads(text, ext=ext, format_name=format_name, rst2md=rst2md,
-                              freeze_metadata=freeze_metadata,
-                              as_version=as_version, **kwargs)
+        return jupytext.reads(text, ext=ext, format_name=format_name, rst2md=rst2md, as_version=as_version, **kwargs)
 
     return _reads
 
@@ -164,12 +162,6 @@ def all_nb_extensions(self):
              "Examples: 'all', 'hide_input,hide_output'",
         config=True)
 
-    freeze_metadata = Bool(
-        False,
-        help='Filter notebook and cell metadata that are not in the text notebook. '
-             'Use this to avoid creating a YAML header when editing text files.',
-        config=True)
-
     comment_magics = Enum(
         values=[True, False],
         allow_none=True,
@@ -273,9 +265,7 @@ def _read_notebook(self, os_path, as_version=4):
         _, fmt, ext = file_fmt_ext(os_path)
         if ext in self.nb_extensions:
             format_name = self.preferred_format(fmt, self.preferred_jupytext_formats_read)
-            with mock.patch('nbformat.reads', _jupytext_reads(fmt, format_name,
-                                                              self.sphinx_convert_rst2md,
-                                                              self.freeze_metadata)):
+            with mock.patch('nbformat.reads', _jupytext_reads(fmt, format_name, self.sphinx_convert_rst2md)):
                 return super(TextFileContentsManager, self)._read_notebook(os_path, as_version)
         else:
             return super(TextFileContentsManager, self)._read_notebook(os_path, as_version)

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -150,9 +150,9 @@ def read_metadata(text, ext):
     else:
         comment = _SCRIPT_EXTENSIONS.get(ext, {}).get('comment', '#')
 
-    metadata, _, _ = header_to_metadata_and_cell(lines, comment)
+    metadata, _, _, _ = header_to_metadata_and_cell(lines, comment)
     if ext in ['.r', '.R'] and not metadata:
-        metadata, _, _ = header_to_metadata_and_cell(lines, "#'")
+        metadata, _, _, _ = header_to_metadata_and_cell(lines, "#'")
 
     return metadata
 
@@ -170,7 +170,8 @@ def guess_format(text, ext):
 
     metadata = read_metadata(text, ext)
 
-    if ('jupytext' in metadata and set(metadata['jupytext']).difference(['encoding', 'main_language'])) or \
+    if ('jupytext' in metadata and set(metadata['jupytext'])
+            .difference(['encoding', 'executable', 'main_language'])) or \
             set(metadata).difference(['jupytext']):
         return format_name_for_ext(metadata, ext)
 

diff --git a/jupytext/header.py b/jupytext/header.py
@@ -128,7 +128,8 @@ def metadata_and_cell_to_header(notebook, text_format, ext):
 
 def header_to_metadata_and_cell(lines, header_prefix):
     """
-    Return the metadata, first cell of notebook, and next loc in text
+    Return the metadata, a boolean to indicate if a jupyter section was found,
+     the first cell of notebook if some metadata is found outside of the jupyter section, and next loc in text
     """
 
     header = []
@@ -203,6 +204,6 @@ def header_to_metadata_and_cell(lines, header_prefix):
         else:
             cell = None
 
-        return metadata, cell, i + 1
+        return metadata, jupyter, cell, i + 1
 
-    return metadata, None, start
+    return metadata, False, None, start
diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py
@@ -26,17 +26,16 @@
 class TextNotebookReader(NotebookReader):
     """Text notebook reader"""
 
-    def __init__(self, ext, format_name=None, freeze_metadata=False):
+    def __init__(self, ext, format_name=None):
         self.ext = ext
         self.format = get_format(ext, format_name)
-        self.freeze_metadata = freeze_metadata
 
     def reads(self, s, **_):
         """Read a notebook from text"""
         lines = s.splitlines()
 
         cells = []
-        metadata, header_cell, pos = header_to_metadata_and_cell(lines, self.format.header_prefix)
+        metadata, jupyter_md, header_cell, pos = header_to_metadata_and_cell(lines, self.format.header_prefix)
         comment_magics = metadata.get('jupytext', {}).get('comment_magics')
 
         if header_cell:
@@ -58,11 +57,11 @@ def reads(self, s, **_):
                 raise Exception('Blocked at lines ' + '\n'.join(lines[:6]))
             lines = lines[pos:]
 
-        if self.freeze_metadata and 'metadata_filter' not in metadata.get('jupytext', {}):
+        if not jupyter_md:
+            # Set a metadata filter equal to the current metadata in script
             cell_metadata = [m for m in cell_metadata if m not in _JUPYTEXT_CELL_METADATA]
             metadata.setdefault('jupytext', {})['metadata_filter'] = {
-                'notebook': ','.join(list(metadata.keys()) + ['-all']),
-                'cells': ','.join(cell_metadata + ['-all'])}
+                'notebook': '-all', 'cells': ','.join(cell_metadata + ['-all'])}
 
         set_main_and_cell_language(metadata, cells, self.format.extension)
 
@@ -139,8 +138,7 @@ def writes(self, nb, **kwargs):
         return '\n'.join(lines)
 
 
-def reads(text, ext, format_name=None,
-          rst2md=False, freeze_metadata=False, as_version=4, **kwargs):
+def reads(text, ext, format_name=None, rst2md=False, as_version=4, **kwargs):
     """Read a notebook from a string"""
     if ext.endswith('.ipynb'):
         return nbformat.reads(text, as_version, **kwargs)
@@ -152,7 +150,7 @@ def reads(text, ext, format_name=None,
         if format_name == 'sphinx' and rst2md:
             format_name = 'sphinx-rst2md'
 
-    reader = TextNotebookReader(ext, format_name, freeze_metadata)
+    reader = TextNotebookReader(ext, format_name)
     notebook = reader.reads(text, **kwargs)
     transition_to_jupytext_section_in_metadata(notebook.metadata, False)
 
@@ -166,24 +164,21 @@ def reads(text, ext, format_name=None,
     return notebook
 
 
-def read(file_or_stream, ext, format_name=None,
-         freeze_metadata=False, as_version=4, **kwargs):
+def read(file_or_stream, ext, format_name=None, as_version=4, **kwargs):
     """Read a notebook from a file"""
     if ext.endswith('.ipynb'):
         notebook = nbformat.read(file_or_stream, as_version, **kwargs)
         transition_to_jupytext_section_in_metadata(notebook.metadata, True)
         return notebook
 
-    return reads(file_or_stream.read(), ext=ext, format_name=format_name,
-                 freeze_metadata=freeze_metadata, **kwargs)
+    return reads(file_or_stream.read(), ext=ext, format_name=format_name, **kwargs)
 
 
-def readf(nb_file, format_name=None, freeze_metadata=False):
+def readf(nb_file, format_name=None):
     """Read a notebook from the file with given name"""
     _, ext = os.path.splitext(nb_file)
     with io.open(nb_file, encoding='utf-8') as stream:
-        return read(stream, as_version=4, ext=ext, format_name=format_name,
-                    freeze_metadata=freeze_metadata)
+        return read(stream, as_version=4, ext=ext, format_name=format_name)
 
 
 def writes(notebook, ext, format_name=None,

diff --git a/tests/notebooks/mirror/sphinx-rst2md_to_ipynb/plot_notebook.ipynb b/tests/notebooks/mirror/sphinx-rst2md_to_ipynb/plot_notebook.ipynb
@@ -208,7 +208,11 @@
  "metadata": {
   "jupytext": {
    "encoding": "# -*- coding: utf-8 -*-",
-   "main_language": "python"
+   "main_language": "python",
+   "metadata_filter": {
+    "cells": "-all",
+    "notebook": "-all"
+   }
   }
  },
  "nbformat": 4,

diff --git a/tests/test_compare.py b/tests/test_compare.py
@@ -1,10 +1,7 @@
 import pytest
 from nbformat.v4.nbbase import new_notebook, new_markdown_cell, new_code_cell, new_raw_cell
-import jupytext
 from jupytext.compare import compare_notebooks, NotebookDifference, test_round_trip_conversion as round_trip_conversion
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 def test_raise_on_different_metadata():
     ref = new_notebook(metadata={'kernelspec': {'language': 'python', 'name': 'python', 'display_name': 'Python'}},

diff --git a/tests/test_contentsmanager.py b/tests/test_contentsmanager.py
@@ -117,7 +117,6 @@ def test_load_save_py_freeze_metadata(script, tmpdir):
     tmp_nbpy = 'notebook.py'
 
     cm = jupytext.TextFileContentsManager()
-    cm.freeze_metadata = True
     cm.root_dir = str(tmpdir)
 
     # read original file
@@ -307,6 +306,7 @@ def test_load_save_percent_format(nb_file, tmpdir):
     # open python, save
     with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True):
         nb = cm.get(tmp_py)['content']
+        del nb.metadata['jupytext']['metadata_filter']
         cm.save(model=dict(type='notebook', content=nb), path=tmp_py)
 
     # compare the new file with original one
@@ -345,7 +345,7 @@ def test_save_to_percent_format(nb_file, tmpdir):
         text_jl = stream.read()
 
     # Parse the YAML header
-    metadata, _, _ = header_to_metadata_and_cell(text_jl.splitlines(), '#')
+    metadata, _, _, _ = header_to_metadata_and_cell(text_jl.splitlines(), '#')
     assert metadata['jupytext']['formats'] == 'ipynb,jl:percent'
 
 

diff --git a/tests/test_header.py b/tests/test_header.py
@@ -22,7 +22,7 @@ def test_header_to_metadata_and_cell_blank_line():
 Header is followed by a blank line
 """
     lines = text.splitlines()
-    metadata, cell, pos = header_to_metadata_and_cell(lines, '')
+    metadata, _, cell, pos = header_to_metadata_and_cell(lines, '')
 
     assert metadata == {}
     assert cell.cell_type == 'raw'
@@ -40,7 +40,7 @@ def test_header_to_metadata_and_cell_no_blank_line():
 Header is not followed by a blank line
 """
     lines = text.splitlines()
-    metadata, cell, pos = header_to_metadata_and_cell(lines, '')
+    metadata, _, cell, pos = header_to_metadata_and_cell(lines, '')
 
     assert metadata == {}
     assert cell.cell_type == 'raw'
@@ -59,7 +59,7 @@ def test_header_to_metadata_and_cell_metadata():
 ---
 """
     lines = text.splitlines()
-    metadata, cell, pos = header_to_metadata_and_cell(lines, '')
+    metadata, _, cell, pos = header_to_metadata_and_cell(lines, '')
 
     assert metadata == {'mainlanguage': 'python'}
     assert cell.cell_type == 'raw'
@@ -94,7 +94,7 @@ def test_metadata_and_cell_to_header2():
 def test_notebook_from_plain_script_has_metadata_filter(script="""print('Hello world")
 """):
     with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True):
-        nb = jupytext.reads(script, '.py', freeze_metadata=True)
+        nb = jupytext.reads(script, '.py')
     assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('notebook') == '-all'
     assert nb.metadata.get('jupytext', {}).get('metadata_filter', {}).get('cells') == '-all'
     with mock.patch('jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER', True):

diff --git a/tests/test_ipynb_to_R.py b/tests/test_ipynb_to_R.py
@@ -5,8 +5,6 @@
 from jupytext.compare import compare_notebooks
 from .utils import list_notebooks
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 @pytest.mark.parametrize('nb_file,ext', itertools.product(list_notebooks('ipynb_R'), ['.r', '.R']))
 def test_identity_source_write_read(nb_file, ext):

diff --git a/tests/test_ipynb_to_py.py b/tests/test_ipynb_to_py.py
@@ -4,8 +4,6 @@
 from jupytext.compare import compare_notebooks
 from .utils import list_notebooks
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 @pytest.mark.parametrize('nb_file', list_notebooks('ipynb_py'))
 def test_identity_source_write_read(nb_file):

diff --git a/tests/test_ipynb_to_rmd.py b/tests/test_ipynb_to_rmd.py
@@ -4,8 +4,6 @@
 from jupytext.compare import compare_notebooks
 from .utils import list_notebooks
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 @pytest.mark.parametrize('nb_file', list_notebooks(skip='66'))
 def test_identity_source_write_read(nb_file):

diff --git a/tests/test_knitr_spin.py b/tests/test_knitr_spin.py
@@ -2,8 +2,6 @@
 import jupytext
 from .utils import list_notebooks, skip_if_dict_is_not_ordered
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 @skip_if_dict_is_not_ordered
 @pytest.mark.parametrize('r_file', list_notebooks('R'))

diff --git a/tests/test_load_multiple.py b/tests/test_load_multiple.py
@@ -4,6 +4,8 @@
 from nbformat.v4.nbbase import new_notebook
 import jupytext
 
+jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
+
 
 def test_combine_same_version_ok(tmpdir):
     tmp_ipynb = 'notebook.ipynb'

diff --git a/tests/test_preserve_empty_cells.py b/tests/test_preserve_empty_cells.py
@@ -4,8 +4,6 @@
 import jupytext
 from jupytext.compare import compare_notebooks
 
-jupytext.header.INSERT_AND_CHECK_VERSION_NUMBER = False
-
 
 @pytest.mark.parametrize('blank_lines', range(1, 6))
 def test_file_with_blank_lines(blank_lines):