Skip to content

Commit

Permalink
fixup! feat: Add option to scan and register HTML anchors
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed Feb 18, 2024
1 parent a246b8f commit 2460e8a
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 28 deletions.
35 changes: 21 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,29 @@ Note that this plugin's behavior is undefined when trying to link to a heading t

### Markdown anchors

The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. The syntax is `[](){#id-of-the-anchor}`. First you must enable the feature:
The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place.

The syntax is:

```md
[](){#id-of-the-anchor}
```

If you look closely, it starts with the usual syntax for a link, `[]()`, except both the text value and URL of the link are empty. Then we see `{#id-of-the-anchor}`, which is the syntax supported by the [`attr_list`](https://python-markdown.github.io/extensions/attr_list/) extension. It sets an HTML id to the anchor element. The autorefs plugin simply gives a meaning to such anchors with ids. Note that raw HTML anchors like `<a id="foo"></a>` are not supported.

The `attr_list` extension must be enabled for the Markdown anchors feature to work:

```yaml
# mkdocs.yml
plugins:
- search
- autorefs
scan_anchors: true
markdown_extensions:
- attr_list
```

Then, add an anchor to a document:
Now, you can add anchors to documents:

```md
Somewhere in a document.
Expand All @@ -76,9 +85,11 @@ Somewhere in a document.
Paragraph about foobar.
```

Now you can link to this anchor with the usual syntax:
...making it possible to link to this anchor with our automatic links:

```md
In any document.
Check out the [paragraph about foobar][foobar-pararaph].
```

Expand All @@ -97,27 +108,25 @@ Linking to the `foobar` anchor will bring you directly to the heading, not the a
## How to contribute to the project?
```

Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading was undefined behavior (it could lead to any one of the headings, undeterministically). With unique aliases above headings, you can make sure to link to the right heading.
Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading is undefined behavior (it could lead to any one of the headings). With unique aliases above headings, you can make sure to link to the right heading.

For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources:

```
```tree
docs/
install/
arch.md
debian.md
gentoo.md
install/
arch.md
debian.md
gentoo.md
```

Each page has:

```md
## Install with package manager
...
## Install from sources
...
```

Expand All @@ -126,12 +135,10 @@ You don't want to change headings and make them redundant, like `## Arch: Instal
```md
[](){#arch-install-pkg}
## Install with package manager
...
[](){#arch-install-src}
## Install from sources
...
```

Expand Down
9 changes: 8 additions & 1 deletion src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import TYPE_CHECKING, Any, Callable, Sequence
from urllib.parse import urlsplit

from markdown.extensions.attr_list import AttrListExtension
from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
from mkdocs.config.defaults import MkDocsConfig
Expand Down Expand Up @@ -145,7 +146,13 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
The modified config.
"""
log.debug("Adding AutorefsExtension to the list")
scan_anchors = self.scan_anchors or self.config.scan_anchors
for ext in config.markdown_extensions:
if ext == "attr_list" or isinstance(ext, AttrListExtension):
log.debug("Enabling Markdown anchors feature")
scan_anchors = True
break
else:
scan_anchors = False
config["markdown_extensions"].append(AutorefsExtension(plugin=self if scan_anchors else None))
return config

Expand Down
18 changes: 6 additions & 12 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import annotations

import re
import unicodedata
from html import escape, unescape
from itertools import zip_longest
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple
Expand Down Expand Up @@ -216,40 +215,35 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
"""
super().__init__(md)
self.plugin = plugin
self._slug = md.treeprocessors["toc"].slugify

def run(self, root: Element) -> None: # noqa: D102
if self.plugin.current_page is not None:
self._scan_anchors(root)

@staticmethod
def _slug(value: str, separator: str = "-") -> str:
value = unicodedata.normalize("NFKD", str(value)).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-_\s]+", separator, value).strip("-_")

def _scan_anchors(self, parent: Element) -> list[str]:
ids = []
# We iterate on pairs of elements, to check if the next element is a heading (alias feature).
for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")):
if el.tag == "a":
# We found an anchor. Record its id if it has one.
if hid := el.get("id"):
if anchor_id := el.get("id"):
if el.tail and el.tail.strip():
# If the anchor has a non-whitespace-only tail, it's not an alias:
# register it immediately.
self.plugin.register_anchor(self.plugin.current_page, hid) # type: ignore[arg-type]
self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type]
else:
# Else record its id and continue.
ids.append(hid)
ids.append(anchor_id)
elif el.tag == "p":
if ids := self._scan_anchors(el):
# Markdown anchors are always rendered as `a` tags within a `p` tag.
# Headings therefore appear after the `p` tag. Here the current element
# is a `p` tag and it contains at least one anchor with an id.
# We can check if the next element is a heading, and use its id as href.
href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else ""
for hid in ids:
self.plugin.register_anchor(self.plugin.current_page, hid, href) # type: ignore[arg-type]
for anchor_id in ids:
self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type]
ids.clear()
else:
# Recurse into sub-elements.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def test_external_references() -> None:
def test_register_markdown_anchors() -> None:
"""Check that Markdown anchors are registered when enabled."""
plugin = AutorefsPlugin()
md = markdown.Markdown(extensions=["attr_list", AutorefsExtension(plugin)])
md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)])
plugin.current_page = ""
md.convert(
dedent(
Expand Down

0 comments on commit 2460e8a

Please sign in to comment.