Skip to content

Commit

Permalink
Encode raw cells in Markdown files using HTML comments
Browse files Browse the repository at this point in the history
Jupytext Markdown format in version 1.2 - #321
Raw cells are encoded using HTML comments (``<!-- #raw -->`` and ``<!-- #endraw -->``) in Markdown files.
Code blocks from Markdown files, when they don't have an explicit language, are displayed as Markdown cells in Jupyter
  • Loading branch information
mwouts committed Sep 21, 2019
1 parent efaac99 commit 75c48fd
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 19 deletions.
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Release History

**Improvements**

- Raw cells are now encoded using HTML comments (``<!-- #raw -->`` and ``<!-- #endraw -->``) in Markdown files. And code blocks from Markdown files, when they don't have an explicit language, are displayed as Markdown cells in Jupyter (#321)
- ``jupytext notebook.py --to ipynb`` updates the timestamp of ``notebook.py`` so that the paired notebook still works in Jupyter (#335, #254)
**BugFixes**

Expand Down
31 changes: 26 additions & 5 deletions jupytext/cell_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def __init__(self, fmt=None, default_language=None):
fmt = {}
self.ext = fmt.get('extension')
self.default_language = default_language or _SCRIPT_EXTENSIONS.get(self.ext, {}).get('language', 'python')
self.comment_magics = fmt['comment_magics'] if 'comment_magics' in fmt else self.default_comment_magics
self.comment_magics = fmt.get('comment_magics', self.default_comment_magics)
self.format_version = fmt.get('format_version')
self.metadata = None
self.org_content = []
self.content = []
Expand Down Expand Up @@ -194,7 +195,14 @@ def find_cell_content(self, lines):
(self.ext in ['.md', '.markdown'] and self.cell_type == 'code' and self.language is None):
if self.metadata.get('active') == '':
del self.metadata['active']
self.cell_type = 'raw'
# Is this a Jupytext document in the Markdown format >= 1.2 ?
if self.ext in ['.md', '.markdown'] and self.format_version not in ['1.0', '1.1']:
self.cell_type = 'markdown'
self.explicit_eoc = False
cell_end_marker += 1
self.content = lines[:cell_end_marker]
else:
self.cell_type = 'raw'

# Explicit end of cell marker?
if (next_cell_start + 1 < len(lines) and
Expand Down Expand Up @@ -228,17 +236,25 @@ class MarkdownCellReader(BaseCellReader):
end_code_re = re.compile(r"^```\s*$")
start_region_re = re.compile(r"^<!--\s*#region(.*)-->\s*$")
end_region_re = re.compile(r"^<!--\s*#endregion\s*-->\s*$")
start_raw_re = re.compile(r"^<!--\s*#raw(.*)-->\s*$")
end_raw_re = re.compile(r"^<!--\s*#endraw\s*-->\s*$")
default_comment_magics = False

def __init__(self, fmt=None, default_language=None):
super(MarkdownCellReader, self).__init__(fmt, default_language)
self.split_at_heading = (fmt or {}).get('split_at_heading', False)
self.in_region = False
self.in_raw = False

def metadata_and_language_from_option_line(self, line):
region = self.start_region_re.match(line)
if region:
self.in_region = True
raw = self.start_raw_re.match(line)
if region or raw:
if region:
self.in_region = True
else:
self.in_raw = True
region = raw
options = region.groups()[0].strip()
if options:
start = options.find('{')
Expand Down Expand Up @@ -267,6 +283,11 @@ def find_cell_end(self, lines):
for i, line in enumerate(lines):
if self.end_region_re.match(line):
return i, i + 1, True
if self.in_raw:
self.cell_type = 'raw'
for i, line in enumerate(lines):
if self.end_raw_re.match(line):
return i, i + 1, True
elif self.metadata is None:
# default markdown: (last) two consecutive blank lines, except when in code blocks
self.cell_type = 'markdown'
Expand Down Expand Up @@ -295,7 +316,7 @@ def find_cell_end(self, lines):
if in_indented_code_block or in_explicit_code_block:
continue

if self.start_code_re.match(line) or self.start_region_re.match(line):
if self.start_code_re.match(line) or self.start_region_re.match(line) or self.start_raw_re.match(line):
if i > 1 and prev_blank:
return i - 1, i, False
return i, i, False
Expand Down
28 changes: 17 additions & 11 deletions jupytext/cell_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,25 @@ def __init__(self, *args, **kwargs):
BaseCellExporter.__init__(self, *args, **kwargs)
self.comment = ''

def html_comment(self, metadata, code='region'):
if metadata:
region_start = ['<!-- #' + code]
if 'title' in metadata and '{' not in metadata['title']:
region_start.append(metadata.pop('title'))
region_start.append(json.dumps(metadata))
region_start.append('-->')
region_start = ' '.join(region_start)
else:
region_start = '<!-- #{} -->'.format(code)

return [region_start] + self.source + ['<!-- #end{} -->'.format(code)]

def cell_to_text(self):
"""Return the text representation of a cell"""
if self.cell_type == 'markdown':
# Is an explicit region required?
if self.metadata or self.cell_reader(self.fmt).read(self.source)[1] < len(self.source):
if self.metadata:
region_start = ['<!-- #region']
if 'title' in self.metadata and '{' not in self.metadata['title']:
region_start.append(self.metadata.pop('title'))
region_start.append(json.dumps(self.metadata))
region_start.append('-->')
region_start = ' '.join(region_start)
else:
region_start = '<!-- #region -->'

return [region_start] + self.source + ['<!-- #endregion -->']
return self.html_comment(self.metadata)
return self.source

return self.code_to_text()
Expand All @@ -148,6 +151,9 @@ def code_to_text(self):
if filtered_metadata:
options.append(metadata_to_md_options(filtered_metadata))

if self.cell_type == 'raw':
return self.html_comment(filtered_metadata, 'raw')

return ['```{}'.format(' '.join(options))] + source + ['```']


Expand Down
8 changes: 5 additions & 3 deletions jupytext/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def __init__(self,
cell_exporter_class=MarkdownCellExporter,
# Version 1.0 on 2018-08-31 - jupytext v0.6.0 : Initial version
# Version 1.1 on 2019-03-24 - jupytext v1.1.0 : Markdown regions and cell metadata
current_version_number='1.1',
# Version 1.2 on 2019-09-21 - jupytext v1.3.0 : Raw regions are now marked with HTML comments
current_version_number='1.2',
min_readable_version_number='1.0'),

NotebookFormatDescription(
Expand All @@ -62,7 +63,7 @@ def __init__(self,
header_prefix='',
cell_reader_class=MarkdownCellReader,
cell_exporter_class=MarkdownCellExporter,
current_version_number='1.1',
current_version_number='1.2',
min_readable_version_number='1.0'),

NotebookFormatDescription(
Expand All @@ -73,7 +74,8 @@ def __init__(self,
cell_exporter_class=RMarkdownCellExporter,
# Version 1.0 on 2018-08-22 - jupytext v0.5.2 : Initial version
# Version 1.1 on 2019-03-24 - jupytext v1.1.0 : Markdown regions and cell metadata
current_version_number='1.1',
# Version 1.2 on 2019-09-21 - jupytext v1.3.0 : Raw regions are now marked with HTML comments
current_version_number='1.2',
min_readable_version_number='1.0')] + \
[
NotebookFormatDescription(
Expand Down
8 changes: 8 additions & 0 deletions jupytext/jupytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ def update_fmt_with_notebook_options(self, metadata):
if opt in self.fmt:
metadata.setdefault('jupytext', {}).setdefault(opt, self.fmt[opt])

# Is this format the same as that documented in the YAML header? If so, we want to know the format version
file_fmt = metadata.get('jupytext', {}).get('text_representation', {})
if self.fmt.get('extension') == file_fmt.get('extension') and \
self.fmt.get('format_name') == file_fmt.get('format_name'):
self.fmt.update(file_fmt)

# rST to md conversion should happen only once
if metadata.get('jupytext', {}).get('rst2md') is True:
metadata['jupytext']['rst2md'] = False
Expand Down Expand Up @@ -165,6 +171,8 @@ def writes(self, nb, metadata=None, **kwargs):
if (i + 1 < len(cell_exporters) and not cell_exporters[i + 1].is_code() and
not texts[i][0].startswith('<!-- #region') and
not texts[i + 1][0].startswith('<!-- #region') and
not texts[i][0].startswith('```') and
not texts[i + 1][0].startswith('```') and
(not split_at_heading or not (texts[i + 1] and texts[i + 1][0].startswith('#')))):
text.append('')

Expand Down

0 comments on commit 75c48fd

Please sign in to comment.