From 3efaf67f592591f7ff5aa2033a00188b5562e3aa Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 13 Jul 2018 14:42:27 +0200 Subject: [PATCH] Load cell inputs from nbrmd_sourceonly_format extension #12 --- README.md | 51 ++++++++------- nbrmd/__init__.py | 5 +- nbrmd/cm.py | 128 +++++++++++++++++++++++++++++++------ nbrmd/combine.py | 13 +++- nbrmd/hooks.py | 47 -------------- tests/test_jupyter_hook.py | 16 +++-- 6 files changed, 157 insertions(+), 103 deletions(-) delete mode 100644 nbrmd/hooks.py diff --git a/README.md b/README.md index 003ce7fa0..213d0b392 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,9 @@ You will be interested in this if R markdown (extension `.Rmd`) is a well established markdown [notebook format](https://rmarkdown.rstudio.com/). As the name states, R markdown was designed in the R community, but it actually support [many languages](https://yihui.name/knitr/demo/engines/). A few months back, the support for python significantly improved with the arrival of the [`reticulate`](https://github.com/rstudio/reticulate) package. -R markdown is almost identical to markdown export of Jupyter notebooks. For reference, Jupyter notebooks are exported to markdown using either +R markdown is a source only format for notebooks. It is almost identical to +markdown export of Jupyter notebooks with outputs filtered. For +reference, Jupyter notebooks are exported to markdown using either - _Download as Markdown (.md)_ in Jupyter's interface, - or `nbconvert notebook.ipynb --to markdown`. @@ -52,30 +54,25 @@ jupyter notebook ``` Now you can open your `.md` and `.Rmd` files as notebooks in Jupyter, -and save your jupyter notebooks in R markdown format. +and save your jupyter notebooks in R markdown format (see below). Rmd notebook in jupyter | Rmd notebook as text :--------------------------:|:-----------------------: ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_notebook.png) | ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/rmd_in_text_editor.png) -When a file with an identical name and a `.ipynb` extension is found, -`nbrmd` loads the outputs from that file. This way, you can put the `.Rmd` -file under version control, and preserve the outputs that match unchanged -inputs. ## Can I save my Jupyter notebook as both R markdown and ipynb? -Yes. That's useful if you want to preserve the outputs locally, or if you want -to share the `.ipynb` version. By default, the opened notebook in jupyter, plus -its `.ipynb` version, are updated when a notebook is saved. +Yes. That's even the recommended setting for the notebooks you want to +set under *version control*. -If you prefer a different setting, we offer both per-notebook, and global configuration. +You need to choose whever to configure this per notebook, or globally. ### Per-notebook configuration The R markdown content manager includes a pre-save hook that will keep up-to date versions of your notebook under the file extensions specified in the `nbrmd_formats` metadata. Edit the notebook metadata in Jupyter and -append a list for the desired format, like this: +append a list for the desired formats, like this: ``` { "kernelspec": { @@ -85,12 +82,11 @@ append a list for the desired format, like this: "language_info": { (...) }, - "nbrmd_formats": [".ipynb", ".Rmd"] + "nbrmd_formats": [".ipynb", ".Rmd"], + "nbrmd_sourceonly_format": ".Rmd" } ``` -Accepted formats are: `.ipynb`, `.Rmd` and `.md`. - ### Global configuration If you want every notebook to be saved as both `.Rmd` and `.ipynb` files, then change your jupyter config to @@ -99,17 +95,28 @@ c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' c.ContentsManager.default_nbrmd_formats = ['.ipynb', '.Rmd'] ``` -If you prefer to update just `.Rmd`, change the above accordingly. - -:warning: Be careful not to open twice a notebook with two distinct extensions! You should _shutdown_ the notebooks -with the extension you are not currently editing (list your open notebooks with the _running_ tab in Jupyter). +If you prefer to update just `.Rmd`, change the above accordingly (you will +still be able to open regular `.ipynb` notebooks). ## Recommendations for version control -I recommend that you only add the R markdown file to version control. When you integrate a change -on that file that was not done through your Jupyter editor, you should be careful to re-open the -`.Rmd` file, not the `.ipynb` one. As mentionned above, outputs that corresponds to -unchanged inputs will be loaded from the `.ipynb` file. +I recommend that you set `nbrmd_formats` to `[".ipynb", ".Rmd"]`, either +in the default configuration, or in the notebook metadata (see above). + +When you save your notebook, two files are generated, +with `.Rmd` and `.ipynb` extensions. Then, when you reopen +either one or the other, +- cell input are taken from the _source only_ format, here `.Rmd` file +- cell outputs are taken from `.ipynb` file. + +This way, you can set the `.Rmd` file under version control, and still have +the commodity of having cell output stored in the ` .ipynb` file. When +the `.Rmd` file is updated outside of Jupyter, then you simply reload the +notebook, and benefit of the updates. + +:warning: Be careful not to open twice a notebook with two distinct +extensions! You should _shutdown_ the notebooks with the extension you are not +currently editing (list your open notebooks with the _running_ tab in Jupyter). ## How do I use the converter? diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index f8e72aaad..2e18d8c9b 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -3,16 +3,13 @@ Use this module to read or write Jupyter notebooks as R Markdown documents (methods 'read', 'reads', 'write', 'writes') -Use the jupyter pre-save hooks (see the documentation) to automatically -dump your Jupyter notebooks as a Rmd file, in addition to the ipynb file -(or the opposite) +Use the RmdFileContentsManager to open Rmd and Jupyter notebooks in Jupyter Use the 'nbrmd' conversion script to convert Jupyter notebooks from/to R Markdown notebooks. """ from .nbrmd import read, reads, readf, write, writes, writef -from .hooks import update_alternative_formats try: from .rmarkdownexporter import RMarkdownExporter diff --git a/nbrmd/cm.py b/nbrmd/cm.py index 796e2fe07..ed803dfdd 100644 --- a/nbrmd/cm.py +++ b/nbrmd/cm.py @@ -1,14 +1,61 @@ import notebook.transutils from notebook.services.contents.filemanager import FileContentsManager from tornado.web import HTTPError -import hooks -import combine import os import nbrmd import nbformat import mock +from . import combine + + +def update_alternative_formats(model, path, contents_manager=None, **kwargs): + """ + A pre-save hook for jupyter that saves the notebooks + under the alternative form. Target extensions are taken from + notebook metadata 'nbrmd_formats', or when not available, + from contents_manager.default_nbrmd_formats + :param model: data model, that may contain the notebook + :param path: full name for ipython notebook + :param contents_manager: ContentsManager instance + :param kwargs: not used + :return: + """ + + # only run on notebooks + if model['type'] != 'notebook': + return + + # only run on nbformat v4 + nb = model['content'] + if nb['nbformat'] != 4: + return + + if isinstance(contents_manager, RmdFileContentsManager): + formats = contents_manager.default_nbrmd_formats + else: + formats = ['.ipynb'] + + formats = nb.get('metadata', {}).get('nbrmd_formats', formats) + + if not isinstance(formats, list) or not set(formats).issubset( + ['.Rmd', '.md', '.ipynb']): + raise TypeError(u"Notebook metadata 'nbrmd_formats' " + u"should be subset of ['.Rmd', '.md', '.ipynb']") + + os_path = contents_manager._get_os_path(path) if contents_manager else path + file, ext = os.path.splitext(path) + os_file, ext = os.path.splitext(os_path) + + for alt_ext in formats: + if ext != alt_ext: + if contents_manager: + contents_manager.log.info( + u"Saving file at /%s", file + alt_ext) + nbrmd.writef(nbformat.notebooknode.from_dict(nb), + os_file + alt_ext) + def _nbrmd_writes(nb, version=nbformat.NO_CONVERT, **kwargs): return nbrmd.writes(nb, **kwargs) @@ -34,38 +81,79 @@ class RmdFileContentsManager(FileContentsManager): """ nb_extensions = ['.ipynb', '.Rmd', '.md'] default_nbrmd_formats = ['.ipynb'] + default_nbrmd_sourceonly_format = None def __init__(self, **kwargs): - self.pre_save_hook = hooks.update_alternative_formats + self.pre_save_hook = update_alternative_formats super(RmdFileContentsManager, self).__init__(**kwargs) - def _read_notebook(self, os_path, as_version=4): + def _read_notebook(self, os_path, as_version=4, + load_alternative_format=True): """Read a notebook from an os path.""" file, ext = os.path.splitext(os_path) - if ext == '.ipynb': - return super(RmdFileContentsManager, self) \ - ._read_notebook(os_path, as_version) - if ext == '.Rmd': with mock.patch('nbformat.reads', _nbrmd_reads): nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) - else: # ext == '.md': + elif ext == '.md': with mock.patch('nbformat.reads', _nbrmd_md_reads): nb = super(RmdFileContentsManager, self) \ ._read_notebook(os_path, as_version) + else: # ext == '.ipynb': + nb = super(RmdFileContentsManager, self) \ + ._read_notebook(os_path, as_version) - # Read outputs from .ipynb version if available - if ext != '.ipynb': - os_path_ipynb = file + '.ipynb' - try: - nb_outputs = self._read_notebook( - os_path_ipynb, as_version=as_version) - combine.combine_inputs_with_outputs(nb, nb_outputs) - if self.notary.check_signature(nb_outputs): - self.notary.sign(nb) - except HTTPError: - pass + if not load_alternative_format: + return nb + + # Notebook formats: default, notebook metadata, or current extension + nbrmd_formats = nb.metadata.get('nbrmd_formats') or \ + self.default_nbrmd_formats + + if ext not in nbrmd_formats: + nbrmd_formats.append(ext) + + # Source format is taken in metadata, contentsmanager, or is current + # ext, or is first non .ipynb format that is found on disk + source_format = nb.metadata.get('nbrmd_sourceonly_format') or \ + self.default_nbrmd_sourceonly_format + + if source_format is None: + if ext != '.ipynb': + source_format = ext + else: + for fmt in nbrmd_formats: + if fmt != '.ipynb' and os.path.isfile(file + fmt): + source_format = fmt + break + + nb_outputs = None + if source_format is not None and ext != source_format: + self.log.info('Reading source from {} and outputs from {}' \ + .format(file + source_format, os_path)) + nb_outputs = nb + nb = self._read_notebook(file + source_format, + as_version=as_version, + load_alternative_format=False) + elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ + and os.path.isfile(file + '.ipynb'): + self.log.info('Reading source from {} and outputs from {}' \ + .format(os_path, file + '.ipynb')) + nb_outputs = self._read_notebook(file + '.ipynb', + as_version=as_version, + load_alternative_format=False) + + # We store in the metadata the alternative and sourceonly formats + trusted = self.notary.check_signature(nb) + nb.metadata['nbrmd_formats'] = nbrmd_formats + nb.metadata['nbrmd_sourceonly_format'] = source_format + + if nb_outputs is not None: + combine.combine_inputs_with_outputs(nb, nb_outputs) + trusted = self.notary.check_signature(nb_outputs) + + if trusted: + self.notary.sign(nb) return nb diff --git a/nbrmd/combine.py b/nbrmd/combine.py index 4774e2146..f263e94f3 100644 --- a/nbrmd/combine.py +++ b/nbrmd/combine.py @@ -7,10 +7,17 @@ def combine_inputs_with_outputs(nb_source, nb_outputs): remaining_output_cells = nb_outputs.cells for cell in nb_source.cells: + if cell.cell_type != 'code': + continue + + # Remove outputs to warranty that trust of returned + # notebook is that of second notebook + cell.execution_count = None + cell.outputs = [] + + # Fill outputs with that of second notebook for i, ocell in enumerate(remaining_output_cells): - if cell.cell_type == 'code' \ - and ocell.cell_type == 'code' \ - and cell.source == ocell.source: + if ocell.cell_type == 'code' and cell.source == ocell.source: cell.execution_count = ocell.execution_count cell.outputs = ocell.outputs diff --git a/nbrmd/hooks.py b/nbrmd/hooks.py deleted file mode 100644 index 3ba5139af..000000000 --- a/nbrmd/hooks.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import nbrmd -import nbformat -import cm - - -def update_alternative_formats(model, path, contents_manager=None, **kwargs): - """ - A pre-save hook for jupyter that saves the notebooks - under the alternative form. Target extensions are taken from - notebook metadata 'nbrmd_formats', or when not available, - from contents_manager.default_nbrmd_formats - :param model: data model, that may contain the notebook - :param path: full name for ipython notebook - :param contents_manager: ContentsManager instance - :param kwargs: not used - :return: - """ - - # only run on notebooks - if model['type'] != 'notebook': - return - - # only run on nbformat v4 - nb = model['content'] - if nb['nbformat'] != 4: - return - - formats = contents_manager.default_nbrmd_formats \ - if isinstance(contents_manager, cm.RmdFileContentsManager) else ['.ipynb'] - formats = nb.get('metadata', {}).get('nbrmd_formats', formats) - if not isinstance(formats, list) or not set(formats).issubset( - ['.Rmd', '.md', '.ipynb']): - raise TypeError(u"Notebook metadata 'nbrmd_formats' " - u"should be subset of ['.Rmd', '.md', '.ipynb']") - - os_path = contents_manager._get_os_path(path) if contents_manager else path - file, ext = os.path.splitext(path) - os_file, ext = os.path.splitext(os_path) - - for alt_ext in formats: - if ext != alt_ext: - if contents_manager: - contents_manager.log.info( - u"Saving file at /%s", file + alt_ext) - nbrmd.writef(nbformat.notebooknode.from_dict(nb), - os_file + alt_ext) diff --git a/tests/test_jupyter_hook.py b/tests/test_jupyter_hook.py index b2d54bb72..6dfe355d8 100644 --- a/tests/test_jupyter_hook.py +++ b/tests/test_jupyter_hook.py @@ -3,6 +3,7 @@ import nbrmd from .utils import list_all_notebooks, remove_outputs, \ remove_outputs_and_header +from nbrmd.cm import update_alternative_formats @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -12,7 +13,7 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -27,7 +28,7 @@ def test_ipynb_is_ok(nb_file, tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_rmd) @@ -44,7 +45,7 @@ def test_all_files_created(nb_file, tmpdir): tmp_rmd = str(tmpdir.join('notebook.Rmd')) nb.metadata['nbrmd_formats'] = ['.Rmd', '.ipynb', '.md'] - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=nb), path=tmp_ipynb) @@ -60,7 +61,7 @@ def test_no_files_created_on_no_format(tmpdir): tmp_md = str(tmpdir.join('notebook.md')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict())), path=tmp_ipynb) @@ -73,7 +74,7 @@ def test_raise_on_wrong_format(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) with pytest.raises(TypeError): - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=4, metadata=dict(nbrmd_formats=['.doc']))), @@ -84,7 +85,8 @@ def test_no_rmd_on_not_notebook(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats(model=dict(type='not notebook'), path=tmp_ipynb) + update_alternative_formats(model=dict(type='not notebook'), + path=tmp_ipynb) assert not os.path.isfile(tmp_rmd) @@ -92,7 +94,7 @@ def test_no_rmd_on_not_v4(tmpdir): tmp_ipynb = str(tmpdir.join('notebook.ipynb')) tmp_rmd = str(tmpdir.join('notebook.Rmd')) - nbrmd.update_alternative_formats( + update_alternative_formats( model=dict(type='notebook', content=dict(nbformat=3)), path=tmp_rmd)