Skip to content

Commit

Permalink
Initial support for the pandoc format for Jupyter notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Mar 27, 2019
1 parent 78032c8 commit 9800c20
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 0 deletions.
24 changes: 24 additions & 0 deletions jupytext/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .metadata_filter import metadata_filter_as_string
from .stringparser import StringParser
from .languages import _SCRIPT_EXTENSIONS, _COMMENT_CHARS
from .pandoc import pandoc_version


class JupytextFormatError(ValueError):
Expand Down Expand Up @@ -134,6 +135,21 @@ def __init__(self,
current_version_number='1.1')
]


def pandoc_format():
"""Jupytext's format description for Pandoc's Markdown"""

return NotebookFormatDescription(
format_name='pandoc',
extension='.md',
header_prefix='',
cell_reader_class=None,
cell_exporter_class=None,
current_version_number=pandoc_version())


JUPYTEXT_FORMATS.append(pandoc_format())

NOTEBOOK_EXTENSIONS = list(dict.fromkeys(['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.hyd', '.nb']

Expand All @@ -151,6 +167,9 @@ def get_format_implementation(ext, format_name=None):
formats_for_extension.append(fmt.format_name)

if formats_for_extension:
if ext == '.md' and format_name == 'pandoc':
raise JupytextFormatError('Please install pandoc>=2.7.1')

raise JupytextFormatError("Format '{}' is not associated to extension '{}'. "
"Please choose one of: {}.".format(format_name, ext,
', '.join(formats_for_extension)))
Expand Down Expand Up @@ -237,6 +256,11 @@ def guess_format(text, ext):
if rspin_comment_count >= 1:
return 'spin'

if ext == '.md':
for line in lines:
if line.startswith(':::'): # Pandoc div
return 'pandoc'

# Default format
return get_format_implementation(ext).format_name

Expand Down
8 changes: 8 additions & 0 deletions jupytext/jupytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .metadata_filter import update_metadata_filters
from .languages import default_language_from_metadata_and_ext, set_main_and_cell_language
from .pep8 import pep8_lines_between_cells
from .pandoc import md_to_notebook, notebook_to_md


class TextNotebookConverter(NotebookReader, NotebookWriter):
Expand All @@ -38,6 +39,9 @@ def update_fmt_with_notebook_options(self, metadata):

def reads(self, s, **_):
"""Read a notebook represented as text"""
if self.fmt.get('format_name') == 'pandoc':
return md_to_notebook(s)

lines = s.splitlines()

cells = []
Expand Down Expand Up @@ -94,6 +98,10 @@ def writes(self, nb, metadata=None, **kwargs):
if 'main_language' in metadata.get('jupytext', {}):
del metadata['jupytext']['main_language']

if self.fmt.get('format_name') == 'pandoc':
# TODO: filter notebook metadata, cell metadata, and remove outputs (optional)
return notebook_to_md(nb)

header = encoding_and_executable(nb, metadata, self.ext)
header_content, header_lines_to_next_cell = metadata_and_cell_to_header(nb, metadata,
self.implementation, self.ext)
Expand Down
42 changes: 42 additions & 0 deletions jupytext/pandoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Jupyter notebook to Markdown and back, using Pandoc"""

import subprocess
import packaging.version
import nbformat


class PandocError(ChildProcessError):
"""An error related to Pandoc"""
pass


def pandoc(args, text=''):
"""Execute pandoc with the given arguments"""
cmd = [u'pandoc'] + args.split()
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
out, err = proc.communicate(text.encode('utf-8'))
if proc.returncode:
raise PandocError('pandoc exited with return code {}\n{}'.format(proc.returncode, str(err)))
return out.decode('utf-8')


def pandoc_version():
"""Pandoc's version number"""
version = pandoc(u'--version').splitlines()[0].split()[1]

if packaging.version.parse(version) < packaging.version.parse('2.7.1'):
raise PandocError('Please install pandoc>=2.7.1 (found version {})'.format(version))

return version


def md_to_notebook(text):
"""Convert a Markdown text to a Jupyter notebook, using Pandoc"""
json = pandoc(u'--from markdown --to ipynb', text)
return nbformat.reads(json, as_version=4)


def notebook_to_md(notebook):
"""Convert a notebook to its Markdown representation, using Pandoc"""
text = nbformat.writes(notebook)
return pandoc(u'--from ipynb --to markdown', text)
34 changes: 34 additions & 0 deletions tests/test_read_simple_pandoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from testfixtures import compare
import jupytext


def test_pandoc_implicit(markdown='''# Lorem ipsum
**Lorem ipsum** dolor sit amet, consectetur adipiscing elit. Nunc luctus
bibendum felis dictum sodales.
``` code
print("hello")
```
'''):
nb = jupytext.reads(markdown, 'md:pandoc')
markdown2 = jupytext.writes(nb, 'md')

nb2 = jupytext.reads(markdown2, 'md')
compare(nb, nb2)

markdown3 = jupytext.writes(nb2, 'md')
compare(markdown2, markdown3)


def test_pandoc_explicit(markdown='''::: {.cell .markdown}
Lorem
=====
**Lorem ipsum** dolor sit amet, consectetur adipiscing elit. Nunc luctus
bibendum felis dictum sodales.
:::
'''):
nb = jupytext.reads(markdown, 'md')
markdown2 = jupytext.writes(nb, 'md').replace('\r\n', '\n')
compare(markdown, markdown2)

0 comments on commit 9800c20

Please sign in to comment.