diff --git a/docs/using/howto.md b/docs/using/howto.md index 12062568..0b9445dc 100644 --- a/docs/using/howto.md +++ b/docs/using/howto.md @@ -28,6 +28,34 @@ To include rST, we must first "wrap" the directive in the [eval-rst directive](s .. include:: snippets/include-rst.rst ``` +(howto/include-readme)= +## Include a file from outside the docs folder (like README.md) + +You can include a file, including one from outside the project using e.g.: + +````md +```{include} ../README.md +``` +```` + +**However**, including a file will not usually resolve local links correctly, like `![](my-image.png)`, since it treats the text as if it originated from the "including file". + +As of myst-parser version 0.12.7, a new, experimental feature has been added to resolve such links. You can now use: + +````md +```{include} ../README.md +:relative-images: +``` +```` + +and the include will attempt to re-write local image links, to reference them from the correct location! + +:::{important} +The current functionality only works for Markdown style images (i.e. not image directives or HTML images). + +If you encounter any issues with this feature, please don't hesitate to report it. +::: + (howto/autodoc)= ## Use `sphinx.ext.autodoc` in Markdown files diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py index a362ff62..3b4024dc 100644 --- a/myst_parser/docutils_renderer.py +++ b/myst_parser/docutils_renderer.py @@ -3,10 +3,9 @@ from contextlib import contextmanager import inspect import json -from os.path import splitext +import os import re from typing import List -from urllib.parse import urlparse import yaml @@ -35,6 +34,7 @@ ) from .parse_directives import parse_directive_text, DirectiveParsingError from .parse_html import HTMLImgParser +from .utils import is_external_url def make_document(source_path="notset") -> nodes.document: @@ -437,40 +437,22 @@ def render_link_open(self, token): ref_node = nodes.reference() self.add_line_and_source_path(ref_node, token) - # Check destination is supported for cross-linking and remove extension - # TODO escape urls? - destination = token.attrGet("href") - title = token.attrGet("title") - _, ext = splitext(destination) - # TODO check for other supported extensions, such as those specified in - # the Sphinx conf.py file but how to access this information? - # TODO this should probably only remove the extension for local paths, - # i.e. not uri's starting with http or other external prefix. - - # if ext.replace('.', '') in self.supported: - # destination = destination.replace(ext, '') + destination = token.attrGet("href") # escape urls? ref_node["refuri"] = destination + title = token.attrGet("title") if title: ref_node["title"] = title next_node = ref_node - url_check = urlparse(destination) - # If there's not a url scheme (e.g. 'https' for 'https:...' links), - # or there is a scheme but it's not in the list of known_url_schemes, - # then assume it's a cross-reference - known_url_schemes = self.config.get("myst_url_schemes", None) - if known_url_schemes: - scheme_known = url_check.scheme in known_url_schemes - else: - scheme_known = bool(url_check.scheme) - - if not url_check.fragment and not scheme_known: - self.handle_cross_reference(token, destination) - else: + if is_external_url( + destination, self.config.get("myst_url_schemes", None), True + ): self.current_node.append(next_node) with self.current_node_context(ref_node): self.render_children(token) + else: + self.handle_cross_reference(token, destination) def handle_cross_reference(self, token, destination): if not self.config.get("ignore_missing_refs", False): @@ -500,9 +482,17 @@ def render_html_block(self, token): def render_image(self, token): img_node = nodes.image() self.add_line_and_source_path(img_node, token) - img_node["uri"] = token.attrGet("src") - # TODO ideally we would render proper markup here, - # this probably requires an upstream change in sphinx + destination = token.attrGet("src") + + if self.config.get("relative_source", None) is not None and not is_external_url( + destination, None, True + ): + img_node["uri"] = os.path.join( + self.config.get("relative_source"), destination + ) + else: + img_node["uri"] = destination + img_node["alt"] = self.renderInlineAsText(token.children) title = token.attrGet("title") if title: @@ -842,6 +832,11 @@ def render_directive(self, token: Token): self.current_node += [error] + messages return + if issubclass(directive_class, Include): + # this is a Markdown only option, + # to allow for altering relative image reference links + directive_class.option_spec["relative-images"] = directives.flag + try: arguments, options, body_lines = parse_directive_text( directive_class, arguments, content diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py index 775f9cdc..cc75588e 100644 --- a/myst_parser/mocking.py +++ b/myst_parser/mocking.py @@ -1,3 +1,7 @@ +"""This module provides classes to Mock the core components of the docutils.RSTParser, +the key difference being that nested parsing treats the text as Markdown not rST. +""" +import os from pathlib import Path import re import sys @@ -417,10 +421,15 @@ def run(self): self.renderer.document["source"] = str(path) self.renderer.reporter.source = str(path) self.renderer.reporter.get_source_and_line = lambda l: (str(path), l) + if "relative-images" in self.options: + self.renderer.config["relative_source"] = os.path.relpath( + path.parent, source_dir + ) self.renderer.nested_render_text(file_content, startline + 1) finally: self.renderer.document["source"] = source self.renderer.reporter.source = rsource + self.renderer.config.pop("relative_source", None) if line_func is not None: self.renderer.reporter.get_source_and_line = line_func else: diff --git a/myst_parser/utils.py b/myst_parser/utils.py index ae4a996d..208eb91c 100644 --- a/myst_parser/utils.py +++ b/myst_parser/utils.py @@ -1,5 +1,6 @@ import html -from urllib.parse import quote +from typing import Optional, List +from urllib.parse import quote, urlparse def escape_url(raw): @@ -7,3 +8,27 @@ def escape_url(raw): Escape urls to prevent code injection craziness. (Hopefully.) """ return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&")) + + +def is_external_url( + reference: str, known_url_schemes: Optional[List[str]], match_fragment: bool +) -> bool: + """Return if a reference should be recognised as an external URL. + + URLs are of the format: scheme://netloc/path;parameters?query#fragment + + This checks if there is a url scheme (e.g. 'https') and, if so, + if the scheme is is the list of known_url_schemes (if supplied). + + :param known_url_schemes: e.g. ["http", "https", "mailto"] + If None, match all schemes + :param match_fragment: If True and a fragment found, then True will be returned, + irrespective of a scheme match + + """ + url_check = urlparse(reference) + if known_url_schemes is not None: + scheme_known = url_check.scheme in known_url_schemes + else: + scheme_known = bool(url_check.scheme) + return scheme_known or (match_fragment and url_check.fragment) diff --git a/tests/test_sphinx/conftest.py b/tests/test_sphinx/conftest.py index 04418d41..9744f41f 100644 --- a/tests/test_sphinx/conftest.py +++ b/tests/test_sphinx/conftest.py @@ -64,6 +64,7 @@ def read( extract_body=False, remove_scripts=False, regress_html=False, + replace=None, ): outpath = path(os.path.join(str(app.srcdir), "_build", buildername, filename)) @@ -82,9 +83,10 @@ def read( soup = BeautifulSoup(content, "html.parser") doc_div = soup.findAll("div", {"class": "documentwrapper"})[0] - file_regression.check( - doc_div.prettify(), extension=".html", encoding="utf8" - ) + text = doc_div.prettify() + for find, rep in (replace or {}).items(): + text = text.replace(find, rep) + file_regression.check(text, extension=".html", encoding="utf8") return content @@ -93,7 +95,7 @@ def read( @pytest.fixture def get_sphinx_app_doctree(file_regression): - def read(app, docname="index", resolve=False, regress=False): + def read(app, docname="index", resolve=False, regress=False, replace=None): if resolve: doctree = app.env.get_and_resolve_doctree(docname, app.builder) extension = ".resolved.xml" @@ -106,7 +108,10 @@ def read(app, docname="index", resolve=False, regress=False): node["source"] = pathlib.Path(node["source"]).name if regress: - file_regression.check(doctree.pformat(), extension=extension) + text = doctree.pformat() # type: str + for find, rep in (replace or {}).items(): + text = text.replace(find, rep) + file_regression.check(text, extension=extension) return doctree diff --git a/tests/test_sphinx/sourcedirs/includes/include1.inc.md b/tests/test_sphinx/sourcedirs/includes/include1.inc.md index 35b43ec5..c35d7dd3 100644 --- a/tests/test_sphinx/sourcedirs/includes/include1.inc.md +++ b/tests/test_sphinx/sourcedirs/includes/include1.inc.md @@ -7,4 +7,5 @@ orphan: true Some text with *syntax* ```{include} subfolder/include2.inc.md +:relative-images: ``` diff --git a/tests/test_sphinx/sourcedirs/includes/subfolder/include2.inc.md b/tests/test_sphinx/sourcedirs/includes/subfolder/include2.inc.md index 1ccbbb48..dc0c21bd 100644 --- a/tests/test_sphinx/sourcedirs/includes/subfolder/include2.inc.md +++ b/tests/test_sphinx/sourcedirs/includes/subfolder/include2.inc.md @@ -13,3 +13,7 @@ This absolute path will refer to the project root (where the `conf.py` is): ```{figure} /subfolder/example2.jpg Caption ``` + +![alt](example2.jpg) + +![alt](https://example.com) diff --git a/tests/test_sphinx/test_sphinx_builds.py b/tests/test_sphinx/test_sphinx_builds.py index 58c262ed..df566984 100644 --- a/tests/test_sphinx/test_sphinx_builds.py +++ b/tests/test_sphinx/test_sphinx_builds.py @@ -104,9 +104,28 @@ def test_includes( assert warnings == "" try: - get_sphinx_app_doctree(app, docname="index", regress=True) + get_sphinx_app_doctree( + app, + docname="index", + regress=True, + # fix for Windows CI + replace={ + r"subfolder\example2.jpg": "subfolder/example2.jpg", + r"subfolder\\example2.jpg": "subfolder/example2.jpg", + r"subfolder\\\\example2.jpg": "subfolder/example2.jpg", + }, + ) finally: - get_sphinx_app_output(app, filename="index.html", regress_html=True) + get_sphinx_app_output( + app, + filename="index.html", + regress_html=True, + replace={ + r"'subfolder\\example2'": "'subfolder/example2'", + r'uri="subfolder\\example2"': 'uri="subfolder/example2"', + "_images/example21.jpg": "_images/example2.jpg", + }, + ) @pytest.mark.sphinx( diff --git a/tests/test_sphinx/test_sphinx_builds/test_includes.html b/tests/test_sphinx/test_sphinx_builds/test_includes.html index 511243a4..723ccc40 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_includes.html +++ b/tests/test_sphinx/test_sphinx_builds/test_includes.html @@ -71,6 +71,12 @@
+ +
++ +