Skip to content

Commit

Permalink
✨ NEW: Addrelative-images option to includes (#237)
Browse files Browse the repository at this point in the history
This allows for local image references to be located correctly, when including from a file in a sub/super folder.

This commit also consolidates some code into `is_external_url`
  • Loading branch information
chrisjsewell authored Aug 31, 2020
1 parent 08c5dea commit f057831
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 38 deletions.
28 changes: 28 additions & 0 deletions docs/using/howto.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,34 @@ To include rST, we must first "wrap" the directive in the [eval-rst directive](s
.. include:: snippets/include-rst.rst
```

(howto/include-readme)=
## Include a file from outside the docs folder (like README.md)

You can include a file, including one from outside the project using e.g.:

````md
```{include} ../README.md
```
````

**However**, including a file will not usually resolve local links correctly, like `![](my-image.png)`, since it treats the text as if it originated from the "including file".

As of myst-parser version 0.12.7, a new, experimental feature has been added to resolve such links. You can now use:

````md
```{include} ../README.md
:relative-images:
```
````

and the include will attempt to re-write local image links, to reference them from the correct location!

:::{important}
The current functionality only works for Markdown style images (i.e. not image directives or HTML images).

If you encounter any issues with this feature, please don't hesitate to report it.
:::

(howto/autodoc)=
## Use `sphinx.ext.autodoc` in Markdown files

Expand Down
55 changes: 25 additions & 30 deletions myst_parser/docutils_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
from contextlib import contextmanager
import inspect
import json
from os.path import splitext
import os
import re
from typing import List
from urllib.parse import urlparse

import yaml

Expand Down Expand Up @@ -35,6 +34,7 @@
)
from .parse_directives import parse_directive_text, DirectiveParsingError
from .parse_html import HTMLImgParser
from .utils import is_external_url


def make_document(source_path="notset") -> nodes.document:
Expand Down Expand Up @@ -437,40 +437,22 @@ def render_link_open(self, token):

ref_node = nodes.reference()
self.add_line_and_source_path(ref_node, token)
# Check destination is supported for cross-linking and remove extension
# TODO escape urls?
destination = token.attrGet("href")
title = token.attrGet("title")
_, ext = splitext(destination)
# TODO check for other supported extensions, such as those specified in
# the Sphinx conf.py file but how to access this information?
# TODO this should probably only remove the extension for local paths,
# i.e. not uri's starting with http or other external prefix.

# if ext.replace('.', '') in self.supported:
# destination = destination.replace(ext, '')
destination = token.attrGet("href") # escape urls?
ref_node["refuri"] = destination

title = token.attrGet("title")
if title:
ref_node["title"] = title
next_node = ref_node

url_check = urlparse(destination)
# If there's not a url scheme (e.g. 'https' for 'https:...' links),
# or there is a scheme but it's not in the list of known_url_schemes,
# then assume it's a cross-reference
known_url_schemes = self.config.get("myst_url_schemes", None)
if known_url_schemes:
scheme_known = url_check.scheme in known_url_schemes
else:
scheme_known = bool(url_check.scheme)

if not url_check.fragment and not scheme_known:
self.handle_cross_reference(token, destination)
else:
if is_external_url(
destination, self.config.get("myst_url_schemes", None), True
):
self.current_node.append(next_node)
with self.current_node_context(ref_node):
self.render_children(token)
else:
self.handle_cross_reference(token, destination)

def handle_cross_reference(self, token, destination):
if not self.config.get("ignore_missing_refs", False):
Expand Down Expand Up @@ -500,9 +482,17 @@ def render_html_block(self, token):
def render_image(self, token):
img_node = nodes.image()
self.add_line_and_source_path(img_node, token)
img_node["uri"] = token.attrGet("src")
# TODO ideally we would render proper markup here,
# this probably requires an upstream change in sphinx
destination = token.attrGet("src")

if self.config.get("relative_source", None) is not None and not is_external_url(
destination, None, True
):
img_node["uri"] = os.path.join(
self.config.get("relative_source"), destination
)
else:
img_node["uri"] = destination

img_node["alt"] = self.renderInlineAsText(token.children)
title = token.attrGet("title")
if title:
Expand Down Expand Up @@ -842,6 +832,11 @@ def render_directive(self, token: Token):
self.current_node += [error] + messages
return

if issubclass(directive_class, Include):
# this is a Markdown only option,
# to allow for altering relative image reference links
directive_class.option_spec["relative-images"] = directives.flag

try:
arguments, options, body_lines = parse_directive_text(
directive_class, arguments, content
Expand Down
9 changes: 9 additions & 0 deletions myst_parser/mocking.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""This module provides classes to Mock the core components of the docutils.RSTParser,
the key difference being that nested parsing treats the text as Markdown not rST.
"""
import os
from pathlib import Path
import re
import sys
Expand Down Expand Up @@ -417,10 +421,15 @@ def run(self):
self.renderer.document["source"] = str(path)
self.renderer.reporter.source = str(path)
self.renderer.reporter.get_source_and_line = lambda l: (str(path), l)
if "relative-images" in self.options:
self.renderer.config["relative_source"] = os.path.relpath(
path.parent, source_dir
)
self.renderer.nested_render_text(file_content, startline + 1)
finally:
self.renderer.document["source"] = source
self.renderer.reporter.source = rsource
self.renderer.config.pop("relative_source", None)
if line_func is not None:
self.renderer.reporter.get_source_and_line = line_func
else:
Expand Down
27 changes: 26 additions & 1 deletion myst_parser/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,34 @@
import html
from urllib.parse import quote
from typing import Optional, List
from urllib.parse import quote, urlparse


def escape_url(raw):
"""
Escape urls to prevent code injection craziness. (Hopefully.)
"""
return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&"))


def is_external_url(
reference: str, known_url_schemes: Optional[List[str]], match_fragment: bool
) -> bool:
"""Return if a reference should be recognised as an external URL.
URLs are of the format: scheme://netloc/path;parameters?query#fragment
This checks if there is a url scheme (e.g. 'https') and, if so,
if the scheme is is the list of known_url_schemes (if supplied).
:param known_url_schemes: e.g. ["http", "https", "mailto"]
If None, match all schemes
:param match_fragment: If True and a fragment found, then True will be returned,
irrespective of a scheme match
"""
url_check = urlparse(reference)
if known_url_schemes is not None:
scheme_known = url_check.scheme in known_url_schemes
else:
scheme_known = bool(url_check.scheme)
return scheme_known or (match_fragment and url_check.fragment)
15 changes: 10 additions & 5 deletions tests/test_sphinx/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def read(
extract_body=False,
remove_scripts=False,
regress_html=False,
replace=None,
):

outpath = path(os.path.join(str(app.srcdir), "_build", buildername, filename))
Expand All @@ -82,9 +83,10 @@ def read(

soup = BeautifulSoup(content, "html.parser")
doc_div = soup.findAll("div", {"class": "documentwrapper"})[0]
file_regression.check(
doc_div.prettify(), extension=".html", encoding="utf8"
)
text = doc_div.prettify()
for find, rep in (replace or {}).items():
text = text.replace(find, rep)
file_regression.check(text, extension=".html", encoding="utf8")

return content

Expand All @@ -93,7 +95,7 @@ def read(

@pytest.fixture
def get_sphinx_app_doctree(file_regression):
def read(app, docname="index", resolve=False, regress=False):
def read(app, docname="index", resolve=False, regress=False, replace=None):
if resolve:
doctree = app.env.get_and_resolve_doctree(docname, app.builder)
extension = ".resolved.xml"
Expand All @@ -106,7 +108,10 @@ def read(app, docname="index", resolve=False, regress=False):
node["source"] = pathlib.Path(node["source"]).name

if regress:
file_regression.check(doctree.pformat(), extension=extension)
text = doctree.pformat() # type: str
for find, rep in (replace or {}).items():
text = text.replace(find, rep)
file_regression.check(text, extension=extension)

return doctree

Expand Down
1 change: 1 addition & 0 deletions tests/test_sphinx/sourcedirs/includes/include1.inc.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ orphan: true
Some text with *syntax*

```{include} subfolder/include2.inc.md
:relative-images:
```
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ This absolute path will refer to the project root (where the `conf.py` is):
```{figure} /subfolder/example2.jpg
Caption
```

![alt](example2.jpg)

![alt](https://example.com)
23 changes: 21 additions & 2 deletions tests/test_sphinx/test_sphinx_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,28 @@ def test_includes(
assert warnings == ""

try:
get_sphinx_app_doctree(app, docname="index", regress=True)
get_sphinx_app_doctree(
app,
docname="index",
regress=True,
# fix for Windows CI
replace={
r"subfolder\example2.jpg": "subfolder/example2.jpg",
r"subfolder\\example2.jpg": "subfolder/example2.jpg",
r"subfolder\\\\example2.jpg": "subfolder/example2.jpg",
},
)
finally:
get_sphinx_app_output(app, filename="index.html", regress_html=True)
get_sphinx_app_output(
app,
filename="index.html",
regress_html=True,
replace={
r"'subfolder\\example2'": "'subfolder/example2'",
r'uri="subfolder\\example2"': 'uri="subfolder/example2"',
"_images/example21.jpg": "_images/example2.jpg",
},
)


@pytest.mark.sphinx(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_sphinx/test_sphinx_builds/test_includes.html
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ <h2>
</a>
</p>
</div>
<p>
<img alt="alt" src="_images/example2.jpg"/>
</p>
<p>
<img alt="alt" src="https://example.com"/>
</p>
<p>
<a class="reference internal" href="#inc-header">
<span class="std std-ref">
Expand Down
4 changes: 4 additions & 0 deletions tests/test_sphinx/test_sphinx_builds/test_includes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
<image candidates="{'*': 'subfolder/example2.jpg'}" uri="subfolder/example2.jpg">
<caption>
Caption
<paragraph>
<image alt="alt" candidates="{'*': 'subfolder/example2.jpg'}" uri="subfolder/example2.jpg">
<paragraph>
<image alt="alt" candidates="{'?': 'https://example.com'}" uri="https://example.com">
<paragraph>
<pending_xref refdoc="index" refdomain="std" refexplicit="False" reftarget="inc_header" reftype="ref" refwarn="True">
<inline classes="xref std std-ref">
Expand Down

0 comments on commit f057831

Please sign in to comment.