From 883428dd4e74dbe741a1dbfcf5b366ab0a804d28 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sun, 30 Aug 2020 11:11:43 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=91=8C=20Implement=20MyST=20using=20markd?= =?UTF-8?q?own-it-py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../continuous-integration-conda.yml | 2 +- CHANGELOG.md | 1 + jupytext/myst.py | 122 +++++++++++------- requirements.txt | 1 + setup.py | 11 +- 5 files changed, 87 insertions(+), 50 deletions(-) diff --git a/.github/workflows/continuous-integration-conda.yml b/.github/workflows/continuous-integration-conda.yml index d3303dac3..14e33af8d 100644 --- a/.github/workflows/continuous-integration-conda.yml +++ b/.github/workflows/continuous-integration-conda.yml @@ -65,7 +65,7 @@ jobs: # install sphinx_gallery and matplotlib if available conda install sphinx-gallery --freeze-installed # myst-parser - conda install 'myst-parser>=0.8' 'myst-parser<0.9' --freeze-installed + conda install 'markdown-it-py>=0.5' 'markdown-it-py<0.6' --freeze-installed exit 0 - name: Conda list shell: pwsh diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f91f5112..8a5ddc515 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - The `# %%` cell marker has the same indentation as the first line in the cell (#562) - The `md:myst` and `md:pandoc` are always included in the Jupytext formats, and an informative runtime error will occur if the required dependencies, resp. `myst-parser` and `pandoc`, are not installed. (#556) +- Jupytext now depends on `markdown-it-py` and always feature the MyST-Markdown format (Python 3.6 and above, #591) **Fixed** - Configured coverage targets in `codecov.yml` diff --git a/jupytext/myst.py b/jupytext/myst.py index 72d565f7a..baa28038b 100644 --- a/jupytext/myst.py +++ b/jupytext/myst.py @@ -4,19 +4,19 @@ """ import json import warnings +import re +from textwrap import dedent import nbformat as nbf import yaml -from .reraise import reraise try: - import myst_parser - from myst_parser.main import default_parser - from myst_parser.parse_directives import DirectiveParsingError, parse_directive_text -except ImportError as err: - myst_parser = None - DirectiveParsingError = Exception - default_parser = parse_directive_text = reraise(err) + from markdown_it import MarkdownIt + from markdown_it.extensions.front_matter import front_matter_plugin + from markdown_it.extensions.myst_blocks import myst_block_plugin + from markdown_it.extensions.myst_role import myst_role_plugin +except ImportError: + MarkdownIt = None MYST_FORMAT_NAME = "myst" CODE_DIRECTIVE = "{code-cell}" @@ -24,14 +24,8 @@ def is_myst_available(): - """Whether the myst-parser package is available.""" - if myst_parser is None: - return False - major, minor = myst_parser.__version__.split(".")[:2] - if int(major) < 1 and int(minor) < 8: - warnings.warn("The installed myst-parser version is less than the required 0.8") - return False - return True + """Whether the markdown-it-py package is available.""" + return MarkdownIt is not None def raise_if_myst_is_not_available(): @@ -43,10 +37,8 @@ def raise_if_myst_is_not_available(): def myst_version(): - """The major version of myst parser.""" - if is_myst_available(): - return ".".join(myst_parser.__version__.split(".")[:2]) - return "N/A" + """The version of myst.""" + return 0.12 def myst_extensions(no_md=False): @@ -56,6 +48,20 @@ def myst_extensions(no_md=False): return [".md", ".myst", ".mystnb", ".mnb"] +def get_parser(): + """Return the markdown-it parser to use.""" + parser = ( + MarkdownIt("commonmark") + .enable("table") + .use(front_matter_plugin) + .use(myst_block_plugin) + .use(myst_role_plugin) + # we only need to parse block level components (for efficiency) + .disable("inline", True) + ) + return parser + + def matches_mystnb( text, ext=None, @@ -79,9 +85,7 @@ def matches_mystnb( return False try: - # parse markdown file up to the block level (i.e. don't worry about inline text) - parser = default_parser("html", disable_syntax=["inline"]) - tokens = parser.parse(text + "\n") + tokens = get_parser().parse(text + "\n") except (TypeError, ValueError) as err: warnings.warn("myst-parse failed unexpectedly: {}".format(err)) return False @@ -164,13 +168,6 @@ def from_nbnode(value): return value -class MockDirective: - option_spec = {"options": True} - required_arguments = 0 - optional_arguments = 1 - has_content = True - - class MystMetadataParsingError(Exception): """Error when parsing metadata from myst formatted text""" @@ -184,23 +181,56 @@ def strip_blank_lines(text): def read_fenced_cell(token, cell_index, cell_type): - """Return cell options and body""" - try: - _, options, body_lines = parse_directive_text( - directive_class=MockDirective, - argument_str="", - content=token.content, - validate_options=False, - ) - except DirectiveParsingError as err: - raise MystMetadataParsingError( - "{0} cell {1} at line {2} could not be read: {3}".format( - cell_type, cell_index, token.map[0] + 1, err - ) - ) + """Parse (and validate) the full directive text.""" + content = token.content + error_msg = "{0} cell {1} at line {2} could not be read: ".format( + cell_type, cell_index, token.map[0] + 1 + ) + + body_lines, options = parse_directive_options(content, error_msg) + + # remove first line of body if blank + # this is to allow space between the options and the content + if body_lines and not body_lines[0].strip(): + body_lines = body_lines[1:] + return options, body_lines +def parse_directive_options(content, error_msg): + """Parse (and validate) the directive option section.""" + options = {} + if content.startswith("---"): + content = "\n".join(content.splitlines()[1:]) + match = re.search(r"^-{3,}", content, re.MULTILINE) + if match: + yaml_block = content[: match.start()] + content = content[match.end() + 1 :] + else: + yaml_block = content + content = "" + yaml_block = dedent(yaml_block) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise MystMetadataParsingError(error_msg + "Invalid YAML; " + str(error)) + elif content.lstrip().startswith(":"): + content_lines = content.splitlines() # type: list + yaml_lines = [] + while content_lines: + if not content_lines[0].lstrip().startswith(":"): + break + yaml_lines.append(content_lines.pop(0).lstrip()[1:]) + yaml_block = "\n".join(yaml_lines) + content = "\n".join(content_lines) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise MystMetadataParsingError(error_msg + "Invalid YAML; " + str(error)) + + return content.splitlines(), options + + def read_cell_metadata(token, cell_index): """Return cell metadata""" metadata = {} @@ -244,9 +274,7 @@ def myst_to_notebook( """ raise_if_myst_is_not_available() - # parse markdown file up to the block level (i.e. don't worry about inline text) - parser = default_parser("html", disable_syntax=["inline"]) - tokens = parser.parse(text + "\n") + tokens = get_parser().parse(text + "\n") lines = text.splitlines() md_start_line = 0 diff --git a/requirements.txt b/requirements.txt index 06c641ab2..a520266dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +#markdown-it-py~=0.5.2 #Python>=3.6 nbformat>=4.0.0 pyyaml toml diff --git a/setup.py b/setup.py index 29289a1e2..60f0ec541 100644 --- a/setup.py +++ b/setup.py @@ -53,9 +53,16 @@ ], entry_points={"console_scripts": ["jupytext = jupytext.cli:jupytext"]}, tests_require=["pytest"], - install_requires=["nbformat>=4.0.0", "pyyaml", "toml", 'mock;python_version<"3"'], + install_requires=[ + "markdown-it-py~=0.5.2; python_version >= '3.6'", + "nbformat>=4.0.0", + "pyyaml", + "toml", + 'mock; python_version<"3"', + ], extras_require={ - "myst": ["myst-parser~=0.8.0; python_version >= '3.6'"], + # left for back-compatibility + "myst": [], "toml": ["toml"], }, license="MIT",