Ignore string delimiters in commented text

Fixes #307
mwouts · Sep 1, 2019 · 66dbe50 · 66dbe50
1 parent 412293f
commit 66dbe50
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 2 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -17,6 +17,7 @@ Release History
 
 - Fix the usability of the `fmt` argument in `jupytext.read` (#312)
 - Fix the download notebook error when `c.notebook_extensions` has a custom value (#318)
+- Ignore string delimiters in commented text (#307)
 
 1.2.1 (2019-07-20)
 ++++++++++++++++++++++

diff --git a/jupytext/languages.py b/jupytext/languages.py
@@ -26,6 +26,8 @@
 _COMMENT_CHARS = [_SCRIPT_EXTENSIONS[ext]['comment'] for ext in _SCRIPT_EXTENSIONS if
                   _SCRIPT_EXTENSIONS[ext]['comment'] != '#']
 
+_COMMENT = {_SCRIPT_EXTENSIONS[ext]['language']: _SCRIPT_EXTENSIONS[ext]['comment'] for ext in _SCRIPT_EXTENSIONS}
+
 _JUPYTER_LANGUAGES = _JUPYTER_LANGUAGES + [
     _SCRIPT_EXTENSIONS[ext]['language'] for ext in _SCRIPT_EXTENSIONS if
     _SCRIPT_EXTENSIONS[ext]['language'] not in _JUPYTER_LANGUAGES]

diff --git a/jupytext/magics.py b/jupytext/magics.py
@@ -2,7 +2,7 @@
 
 import re
 from .stringparser import StringParser
-from .languages import _SCRIPT_EXTENSIONS
+from .languages import _SCRIPT_EXTENSIONS, _COMMENT
 
 # A magic expression is a line or cell or metakernel magic (#94, #61) escaped zero, or multiple times
 _MAGIC_RE = {_SCRIPT_EXTENSIONS[ext]['language']: re.compile(
@@ -14,7 +14,6 @@
     r"^({0} |{0})*(%|%%|%%%)[a-zA-Z](.*){0}\s*noescape".format(
         _SCRIPT_EXTENSIONS[ext]['comment'])) for ext in _SCRIPT_EXTENSIONS}
 _LINE_CONTINUATION_RE = re.compile(r'.*\\\s*$')
-_COMMENT = {_SCRIPT_EXTENSIONS[ext]['language']: _SCRIPT_EXTENSIONS[ext]['comment'] for ext in _SCRIPT_EXTENSIONS}
 
 # Commands starting with a question or exclamation mark have to be escaped
 _PYTHON_HELP_OR_BASH_CMD = re.compile(r"^(# |#)*(\?|!)\s*[A-Za-z]")

diff --git a/jupytext/stringparser.py b/jupytext/stringparser.py
@@ -1,5 +1,6 @@
 """A simple file parser that can tell whether the first character of a line
 is quoted or not"""
+from .languages import _COMMENT
 
 
 class StringParser:
@@ -11,6 +12,7 @@ class StringParser:
     def __init__(self, language):
         self.ignore = language is None
         self.python = language != 'R'
+        self.comment = _COMMENT.get(language)
 
     def is_quoted(self):
         """Is the next line quoted?"""
@@ -23,6 +25,10 @@ def read_line(self, line):
         if self.ignore:
             return
 
+        # Do not search for quotes when the line is commented out (and not quoted)
+        if not self.is_quoted() and self.comment is not None and line.startswith(self.comment):
+            return
+
         for i, char in enumerate(line):
             if char not in ['"', "'"]:
                 continue

diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py
@@ -667,6 +667,48 @@ def test_notebook_one_blank_line_before_first_markdown_cell(script="""
             assert lines[-1]
 
 
+def test_read_markdown_cell_with_triple_quote_307(
+        script="""# This script test that commented triple quotes '''
+# do not impede the correct identification of Markdown cells
+
+# Here is Markdown cell number 2 '''
+"""):
+    notebook = jupytext.reads(script, 'py')
+    assert len(notebook.cells) == 2
+    assert notebook.cells[0].cell_type == 'markdown'
+    assert notebook.cells[0].source == """This script test that commented triple quotes '''
+do not impede the correct identification of Markdown cells"""
+    assert notebook.cells[1].cell_type == 'markdown'
+    assert notebook.cells[1].source == "Here is Markdown cell number 2 '''"
+
+    script2 = jupytext.writes(notebook, 'py')
+    compare(script2, script)
+
+
+def test_read_explicit_markdown_cell_with_triple_quote_307(
+        script="""# {{{ {"special": "metadata", "cell_type": "markdown"}
+# some text '''
+# }}}
+
+print('hello world')
+
+# {{{ {"special": "metadata", "cell_type": "markdown"}
+# more text '''
+# }}}
+"""):
+    notebook = jupytext.reads(script, 'py')
+    assert len(notebook.cells) == 3
+    assert notebook.cells[0].cell_type == 'markdown'
+    assert notebook.cells[0].source == "some text '''"
+    assert notebook.cells[1].cell_type == 'code'
+    assert notebook.cells[1].source == "print('hello world')"
+    assert notebook.cells[2].cell_type == 'markdown'
+    assert notebook.cells[2].source == "more text '''"
+
+    script2 = jupytext.writes(notebook, 'py')
+    compare(script2, script)
+
+
 def test_round_trip_markdown_cell_with_magic():
     notebook = new_notebook(cells=[new_markdown_cell('IPython has magic commands like\n%quickref')],
                             metadata={'jupytext': {'main_language': 'python'}})