From ad2134d0415387d2e0c27dc3579bbb1bf8a851dd Mon Sep 17 00:00:00 2001
From: Marc Wouts <marc.wouts@gmail.com>
Date: Wed, 27 Mar 2019 05:49:14 +0100
Subject: [PATCH] Use HTML comment to protect Markdown cells

#66 #111 #188
---
 README.md                                     |  9 +++++----
 jupytext/cell_reader.py                       | 19 +++++++++++--------
 jupytext/cell_to_text.py                      | 12 ++++++++----
 jupytext/jupytext.py                          |  3 ++-
 .../Notebook with metadata and long cells.Rmd | 10 ++++------
 .../Notebook with metadata and long cells.md  | 15 ++++++---------
 .../ipynb_to_md/sample_rise_notebook_66.md    | 10 ++++------
 tests/test_read_simple_markdown.py            | 10 ++++------
 8 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index c1bad737c..3cbbb6edc 100755
--- a/README.md
+++ b/README.md
@@ -334,10 +334,11 @@ Save Jupyter notebooks as [Markdown](https://daringfireball.net/projects/markdow
 
 [R Markdown](https://rmarkdown.rstudio.com/authoring_quick_tour.html) is [RStudio](https://www.rstudio.com/)'s format for notebooks, with support for R, Python, and many [other languages](https://bookdown.org/yihui/rmarkdown/language-engines.html).
 
-Our implementation for Jupyter notebooks as [Markdown](https://daringfireball.net/projects/markdown/syntax) or [R Markdown](https://rmarkdown.rstudio.com/authoring_quick_tour.html) documents is straightforward:
-- A YAML header contains the notebook metadata (Jupyter kernel, etc)
-- Markdown cells are inserted verbatim and separated with two blank lines. When required (cells with metadata, cells that contain two blank lines or code blocks), Jupytext inserts explicit start and end region markers in the form of Markdown comments: `[region]: #` and `[endregion]: #`.
-- Code and raw cells start with triple backticks collated with cell language, and end with triple backticks. Cell metadata are encoded in JSON format. The [code cell options](https://yihui.name/knitr/options/) in the R Markdown format are mapped to the corresponding Jupyter cell metadata options, when available.
+
+Jupytext's implementation for Jupyter notebooks as [Markdown](https://daringfireball.net/projects/markdown/syntax) or [R Markdown](https://rmarkdown.rstudio.com/authoring_quick_tour.html) documents is as follows:
+- The notebook metadata (Jupyter kernel, etc) goes to a YAML header
+- Code and raw cells are encoded as Markdown code blocks with triple backticks. In a Python notebook, a code cell starts with ` ```python` and ends with ` ``` `. Cell metadata are found after the language information, with a `key=value` syntax, where `value` is encoded in JSON format (Markdown) or R format (R Markdown). R Markdown [code cell options](https://yihui.name/knitr/options/) are mapped to the corresponding Jupyter cell metadata options, when available.
+- Markdown cells are inserted verbatim and separated with two blank lines. When required (cells with metadata, cells that contain two blank lines or code blocks), Jupytext protects the cell boundary with HTML comments: `<!-- #region -->` and `<!-- #endregion -->`. Cells with explicit boundaries are [foldable](https://code.visualstudio.com/docs/editor/codebasics#_folding) in vscode, and can accept both a title and/or metadata in JSON format: `<!-- #region This is the title for my protected cell {"key": "value"}-->`.
 
 See how our `World population.ipynb` notebook in the [demo folder](https://github.com/mwouts/jupytext/tree/master/demo) is represented in [Markdown](https://github.com/mwouts/jupytext/blob/master/demo/World%20population.md) or [R Markdown](https://github.com/mwouts/jupytext/blob/master/demo/World%20population.Rmd).
 
diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
index ebb15bd72..88da6f42a 100644
--- a/jupytext/cell_reader.py
+++ b/jupytext/cell_reader.py
@@ -228,11 +228,6 @@ def find_cell_content(self, lines):
             if lines_to_end_of_cell_marker != (0 if pep8_lines == 1 else 2):
                 self.metadata['lines_to_end_of_cell_marker'] = lines_to_end_of_cell_marker
 
-        # Exactly one empty line at the end of markdown cell?
-        if self.ext in ['.md', '.Rmd'] and _BLANK_LINE.match(source[-1]) and \
-                cell_end_marker < len(lines) and MarkdownCellReader.end_region_re.match(lines[cell_end_marker]):
-            source = source[:-1]
-
         if not is_active(self.ext, self.metadata) or \
                 ('active' not in self.metadata and self.language and self.language != self.default_language):
             self.content = uncomment(source, self.comment if self.ext not in ['.r', '.R'] else '#')
@@ -276,8 +271,8 @@ class MarkdownCellReader(BaseCellReader):
     start_code_re = re.compile(r"^```(.*)")
     non_jupyter_code_re = re.compile(r"^```\{")
     end_code_re = re.compile(r"^```\s*$")
-    start_region_re = re.compile(r"^\[region(.*)\]:\s*#\s*$")
-    end_region_re = re.compile(r"^\[endregion\]:\s*#\s*$")
+    start_region_re = re.compile(r"^<!--\s*#region(.*)-->\s*$")
+    end_region_re = re.compile(r"^<!--\s*#endregion\s*-->\s*$")
     default_comment_magics = False
 
     def __init__(self, fmt=None, default_language=None):
@@ -291,8 +286,16 @@ def metadata_and_language_from_option_line(self, line):
             self.in_region = True
             options = region.groups()[0].strip()
             if options:
-                options = re.sub(r'\\\[', u'[', re.sub(r'\\\]', u']', options))
+                start = options.find('{')
+                if start >= 0:
+                    title = options[:start].strip()
+                    options = options[start:]
+                else:
+                    title = options.strip()
+                    options = "{}"
                 self.metadata = json.loads(options)
+                if title:
+                    self.metadata['title'] = title
             else:
                 self.metadata = {}
         elif self.start_code_re.match(line):
diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py
index 4bc82e1df..77d0b6d32 100644
--- a/jupytext/cell_to_text.py
+++ b/jupytext/cell_to_text.py
@@ -119,12 +119,16 @@ def cell_to_text(self):
             # Is an explicit region required?
             if self.metadata or self.cell_reader(self.fmt).read(self.source)[1] < len(self.source):
                 if self.metadata:
-                    region_start = '[region {}]: #'.format(
-                        re.sub(r'\[', u'\\[', re.sub(r'\]', u'\\]', json.dumps(self.metadata))))
+                    region_start = ['<!-- #region']
+                    if 'title' in self.metadata and '{' not in self.metadata['title']:
+                        region_start.append(self.metadata.pop('title'))
+                    region_start.append(json.dumps(self.metadata))
+                    region_start.append('-->')
+                    region_start = ' '.join(region_start)
                 else:
-                    region_start = '[region]: #'
+                    region_start = '<!-- #region -->'
 
-                return [region_start] + self.source + ['', '[endregion]: #']
+                return [region_start] + self.source + ['<!-- #endregion -->']
             return self.source
 
         return self.code_to_text()
diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py
index c1e1165c4..8a9b6e199 100644
--- a/jupytext/jupytext.py
+++ b/jupytext/jupytext.py
@@ -132,7 +132,8 @@ def writes(self, nb, metadata=None, **kwargs):
             # two blank lines between markdown cells in Rmd when those do not have explicit region markers
             if self.ext in ['.Rmd', '.md'] and not cell.is_code():
                 if (i + 1 < len(cell_exporters) and not cell_exporters[i + 1].is_code() and
-                        not texts[i][0].startswith('[region') and not texts[i + 1][0].startswith('[region') and
+                        not texts[i][0].startswith('<!-- #region') and
+                        not texts[i + 1][0].startswith('<!-- #region') and
                         (not split_at_heading or not (texts[i + 1] and texts[i + 1][0].startswith('#')))):
                     text.append('')
 
diff --git a/tests/notebooks/mirror/ipynb_to_Rmd/Notebook with metadata and long cells.Rmd b/tests/notebooks/mirror/ipynb_to_Rmd/Notebook with metadata and long cells.Rmd
index 7cc0e468d..e03e69905 100644
--- a/tests/notebooks/mirror/ipynb_to_Rmd/Notebook with metadata and long cells.Rmd	
+++ b/tests/notebooks/mirror/ipynb_to_Rmd/Notebook with metadata and long cells.Rmd	
@@ -8,13 +8,12 @@ jupyter:
 
 # Part one - various cells
 
-[region]: #
+<!-- #region -->
 Here we have a markdown cell
 
 
 with two blank lines
-
-[endregion]: #
+<!-- #endregion -->
 
 Now we have a markdown cell
 with a code block inside it
@@ -42,10 +41,9 @@ of the raw cell
 
 # Part two - cell metadata
 
-[region {"key": "value"}]: #
+<!-- #region {"key": "value"} -->
 This is a markdown cell with cell metadata `{"key": "value"}`
-
-[endregion]: #
+<!-- #endregion -->
 
 ```{python .class=None, tags=c("parameters")}
 """This is a code cell with metadata `{"tags":["parameters"], ".class":null}`"""
diff --git a/tests/notebooks/mirror/ipynb_to_md/Notebook with metadata and long cells.md b/tests/notebooks/mirror/ipynb_to_md/Notebook with metadata and long cells.md
index cba9d98e6..de273fc00 100644
--- a/tests/notebooks/mirror/ipynb_to_md/Notebook with metadata and long cells.md	
+++ b/tests/notebooks/mirror/ipynb_to_md/Notebook with metadata and long cells.md	
@@ -8,15 +8,14 @@ jupyter:
 
 # Part one - various cells
 
-[region]: #
+<!-- #region -->
 Here we have a markdown cell
 
 
 with two blank lines
+<!-- #endregion -->
 
-[endregion]: #
-
-[region]: #
+<!-- #region -->
 Now we have a markdown cell
 with a code block inside it
 
@@ -25,8 +24,7 @@ with a code block inside it
 ```
 
 After that cell we'll have a code cell
-
-[endregion]: #
+<!-- #endregion -->
 
 ```python
 2 + 2
@@ -45,10 +43,9 @@ of the raw cell
 
 # Part two - cell metadata
 
-[region {"key": "value"}]: #
+<!-- #region {"key": "value"} -->
 This is a markdown cell with cell metadata `{"key": "value"}`
-
-[endregion]: #
+<!-- #endregion -->
 
 ```python .class tags=["parameters"]
 """This is a code cell with metadata `{"tags":["parameters"], ".class":null}`"""
diff --git a/tests/notebooks/mirror/ipynb_to_md/sample_rise_notebook_66.md b/tests/notebooks/mirror/ipynb_to_md/sample_rise_notebook_66.md
index 45a69ef3c..3b3d4f2d6 100644
--- a/tests/notebooks/mirror/ipynb_to_md/sample_rise_notebook_66.md
+++ b/tests/notebooks/mirror/ipynb_to_md/sample_rise_notebook_66.md
@@ -6,16 +6,14 @@ jupyter:
     name: python3
 ---
 
-[region {"slideshow": {"slide_type": "slide"}}]: #
+<!-- #region {"slideshow": {"slide_type": "slide"}} -->
 A markdown cell
-
-[endregion]: #
+<!-- #endregion -->
 
 ```python slideshow={"slide_type": ""}
 1+1
 ```
 
-[region {"cell_style": "center", "slideshow": {"slide_type": "fragment"}}]: #
+<!-- #region {"cell_style": "center", "slideshow": {"slide_type": "fragment"}} -->
 Markdown cell two
-
-[endregion]: #
+<!-- #endregion -->
diff --git a/tests/test_read_simple_markdown.py b/tests/test_read_simple_markdown.py
index 8a6b4cb70..28859fe76 100644
--- a/tests/test_read_simple_markdown.py
+++ b/tests/test_read_simple_markdown.py
@@ -126,13 +126,12 @@ def test_raw_cell_with_metadata(markdown="""```key="value"
     compare(markdown, markdown2)
 
 
-def test_markdown_cell_with_metadata(markdown="""[region {"key": "value"}]: #
+def test_markdown_cell_with_metadata(markdown="""<!-- #region {"key": "value"} -->
 A long
 
 
 markdown cell
-
-[endregion]: #
+<!-- #endregion -->
 """):
     nb = jupytext.reads(markdown, 'md')
     compare(nb.cells[0], new_markdown_cell(source='A long\n\n\nmarkdown cell',
@@ -143,13 +142,12 @@ def test_markdown_cell_with_metadata(markdown="""[region {"key": "value"}]: #
 
 def test_two_markdown_cells(markdown="""# A header
 
-[region]: #
+<!-- #region -->
 A long
 
 
 markdown cell
-
-[endregion]: #
+<!-- #endregion -->
 """):
     nb = jupytext.reads(markdown, 'md')
     compare(nb.cells[0], new_markdown_cell(source='# A header'))