From e87433542f9f488057f8011a8a572d2525b1bf42 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 21 Sep 2019 19:56:19 +0200 Subject: [PATCH] Encode raw cells in Markdown files using HTML comments Jupytext Markdown format in version 1.2 - #321 Raw cells are encoded using HTML comments (```` and ````) in Markdown files. Code blocks from Markdown files, when they don't have an explicit language, are displayed as Markdown cells in Jupyter --- HISTORY.rst | 1 + jupytext/cell_reader.py | 31 ++++++++++++++++++++++++++----- jupytext/cell_to_text.py | 28 +++++++++++++++++----------- jupytext/formats.py | 8 +++++--- jupytext/jupytext.py | 8 ++++++++ 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index b57ca15e5..69cbe2372 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -8,6 +8,7 @@ Release History **Improvements** +- Raw cells are now encoded using HTML comments (```` and ````) in Markdown files. And code blocks from Markdown files, when they don't have an explicit language, are displayed as Markdown cells in Jupyter (#321) - ``jupytext notebook.py --to ipynb`` updates the timestamp of ``notebook.py`` so that the paired notebook still works in Jupyter (#335, #254) **BugFixes** diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py index b4e9e305b..4d61ef84d 100644 --- a/jupytext/cell_reader.py +++ b/jupytext/cell_reader.py @@ -95,7 +95,8 @@ def __init__(self, fmt=None, default_language=None): fmt = {} self.ext = fmt.get('extension') self.default_language = default_language or _SCRIPT_EXTENSIONS.get(self.ext, {}).get('language', 'python') - self.comment_magics = fmt['comment_magics'] if 'comment_magics' in fmt else self.default_comment_magics + self.comment_magics = fmt.get('comment_magics', self.default_comment_magics) + self.format_version = fmt.get('format_version') self.metadata = None self.org_content = [] self.content = [] @@ -194,7 +195,14 @@ def find_cell_content(self, lines): (self.ext in ['.md', '.markdown'] and self.cell_type == 'code' and self.language is None): if self.metadata.get('active') == '': del self.metadata['active'] - self.cell_type = 'raw' + # Is this a Jupytext document in the Markdown format >= 1.2 ? + if self.ext in ['.md', '.markdown'] and self.format_version not in ['1.0', '1.1']: + self.cell_type = 'markdown' + self.explicit_eoc = False + cell_end_marker += 1 + self.content = lines[:cell_end_marker] + else: + self.cell_type = 'raw' # Explicit end of cell marker? if (next_cell_start + 1 < len(lines) and @@ -228,17 +236,25 @@ class MarkdownCellReader(BaseCellReader): end_code_re = re.compile(r"^```\s*$") start_region_re = re.compile(r"^\s*$") end_region_re = re.compile(r"^\s*$") + start_raw_re = re.compile(r"^\s*$") + end_raw_re = re.compile(r"^\s*$") default_comment_magics = False def __init__(self, fmt=None, default_language=None): super(MarkdownCellReader, self).__init__(fmt, default_language) self.split_at_heading = (fmt or {}).get('split_at_heading', False) self.in_region = False + self.in_raw = False def metadata_and_language_from_option_line(self, line): region = self.start_region_re.match(line) - if region: - self.in_region = True + raw = self.start_raw_re.match(line) + if region or raw: + if region: + self.in_region = True + else: + self.in_raw = True + region = raw options = region.groups()[0].strip() if options: start = options.find('{') @@ -267,6 +283,11 @@ def find_cell_end(self, lines): for i, line in enumerate(lines): if self.end_region_re.match(line): return i, i + 1, True + if self.in_raw: + self.cell_type = 'raw' + for i, line in enumerate(lines): + if self.end_raw_re.match(line): + return i, i + 1, True elif self.metadata is None: # default markdown: (last) two consecutive blank lines, except when in code blocks self.cell_type = 'markdown' @@ -295,7 +316,7 @@ def find_cell_end(self, lines): if in_indented_code_block or in_explicit_code_block: continue - if self.start_code_re.match(line) or self.start_region_re.match(line): + if self.start_code_re.match(line) or self.start_region_re.match(line) or self.start_raw_re.match(line): if i > 1 and prev_blank: return i - 1, i, False return i, i, False diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py index 63b9ba57e..4147a312c 100644 --- a/jupytext/cell_to_text.py +++ b/jupytext/cell_to_text.py @@ -113,22 +113,25 @@ def __init__(self, *args, **kwargs): BaseCellExporter.__init__(self, *args, **kwargs) self.comment = '' + def html_comment(self, metadata, code='region'): + if metadata: + region_start = ['') + region_start = ' '.join(region_start) + else: + region_start = ''.format(code) + + return [region_start] + self.source + [''.format(code)] + def cell_to_text(self): """Return the text representation of a cell""" if self.cell_type == 'markdown': # Is an explicit region required? if self.metadata or self.cell_reader(self.fmt).read(self.source)[1] < len(self.source): - if self.metadata: - region_start = ['') - region_start = ' '.join(region_start) - else: - region_start = '' - - return [region_start] + self.source + [''] + return self.html_comment(self.metadata) return self.source return self.code_to_text() @@ -148,6 +151,9 @@ def code_to_text(self): if filtered_metadata: options.append(metadata_to_md_options(filtered_metadata)) + if self.cell_type == 'raw': + return self.html_comment(filtered_metadata, 'raw') + return ['```{}'.format(' '.join(options))] + source + ['```'] diff --git a/jupytext/formats.py b/jupytext/formats.py index d5e2f0b79..5c2ca3999 100644 --- a/jupytext/formats.py +++ b/jupytext/formats.py @@ -53,7 +53,8 @@ def __init__(self, cell_exporter_class=MarkdownCellExporter, # Version 1.0 on 2018-08-31 - jupytext v0.6.0 : Initial version # Version 1.1 on 2019-03-24 - jupytext v1.1.0 : Markdown regions and cell metadata - current_version_number='1.1', + # Version 1.2 on 2019-09-21 - jupytext v1.3.0 : Raw regions are now marked with HTML comments + current_version_number='1.2', min_readable_version_number='1.0'), NotebookFormatDescription( @@ -62,7 +63,7 @@ def __init__(self, header_prefix='', cell_reader_class=MarkdownCellReader, cell_exporter_class=MarkdownCellExporter, - current_version_number='1.1', + current_version_number='1.2', min_readable_version_number='1.0'), NotebookFormatDescription( @@ -73,7 +74,8 @@ def __init__(self, cell_exporter_class=RMarkdownCellExporter, # Version 1.0 on 2018-08-22 - jupytext v0.5.2 : Initial version # Version 1.1 on 2019-03-24 - jupytext v1.1.0 : Markdown regions and cell metadata - current_version_number='1.1', + # Version 1.2 on 2019-09-21 - jupytext v1.3.0 : Raw regions are now marked with HTML comments + current_version_number='1.2', min_readable_version_number='1.0')] + \ [ NotebookFormatDescription( diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py index 9cd5b307d..96b388a22 100644 --- a/jupytext/jupytext.py +++ b/jupytext/jupytext.py @@ -39,6 +39,12 @@ def update_fmt_with_notebook_options(self, metadata): if opt in self.fmt: metadata.setdefault('jupytext', {}).setdefault(opt, self.fmt[opt]) + # Is this format the same as that documented in the YAML header? If so, we want to know the format version + file_fmt = metadata.get('jupytext', {}).get('text_representation', {}) + if self.fmt.get('extension') == file_fmt.get('extension') and \ + self.fmt.get('format_name') == file_fmt.get('format_name'): + self.fmt.update(file_fmt) + # rST to md conversion should happen only once if metadata.get('jupytext', {}).get('rst2md') is True: metadata['jupytext']['rst2md'] = False @@ -165,6 +171,8 @@ def writes(self, nb, metadata=None, **kwargs): if (i + 1 < len(cell_exporters) and not cell_exporters[i + 1].is_code() and not texts[i][0].startswith('