diff --git a/docs/changelog.md b/docs/changelog.md index 72d56865..6f40a6bf 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fix type annotations for `convertFile` - it accepts only bytes-based buffers. Also remove legacy checks from Python 2 (#1400) +* Improve and expand type annotations in the code base (#1401). ## [3.5.1] -- 2023-10-31 diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d8084680..d2020b9b 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -171,14 +171,14 @@ def __init__(self, *args): super().__init__(*args) self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or (len(parent) and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) level, sibling = self.get_level(parent, block) block = self.looseDetab(block, level) @@ -251,10 +251,10 @@ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Eleme class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: sibling = self.lastChild(parent) block = blocks.pop(0) theRest = '' @@ -286,10 +286,10 @@ class BlockQuoteProcessor(BlockProcessor): RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) and not util.nearing_recursion_limit() - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -353,10 +353,10 @@ def __init__(self, parser: BlockParser): self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' % (self.tab_length, self.tab_length * 2 - 1)) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.match(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: # Check for multiple items in one block. items = self.get_items(blocks.pop(0)) sibling = self.lastChild(parent) @@ -460,10 +460,10 @@ class HashHeaderProcessor(BlockProcessor): # Detect a header at start of any line in block RE = re.compile(r'(?:^|\n)(?P#{1,6})(?P
(?:\\.|[^\\])*?)#*(?:\n|$)') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -491,10 +491,10 @@ class SetextHeaderProcessor(BlockProcessor): # Detect Setext-style header. Must be first 2 lines of block. RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.match(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: lines = blocks.pop(0).split('\n') # Determine level. `=` is 1 and `-` is 2. if lines[1].startswith('='): @@ -517,7 +517,7 @@ class HRProcessor(BlockProcessor): # Detect hr on any line of a block. SEARCH_RE = re.compile(RE, re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: m = self.SEARCH_RE.search(block) if m: # Save match object on class instance so we can use it later. @@ -525,7 +525,7 @@ def test(self, parent, block): return True return False - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) match = self.match # Check for lines in block before `hr`. @@ -545,10 +545,10 @@ def run(self, parent, blocks): class EmptyBlockProcessor(BlockProcessor): """ Process blocks that are empty or start with an empty line. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return not block or block.startswith('\n') - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) filler = '\n\n' if block: @@ -575,10 +575,10 @@ class ReferenceProcessor(BlockProcessor): r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE ) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -601,10 +601,10 @@ def run(self, parent, blocks): class ParagraphProcessor(BlockProcessor): """ Process Paragraph blocks. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) if block.strip(): # Not a blank block. Add to parent, otherwise throw it away. diff --git a/markdown/core.py b/markdown/core.py index 09a3924f..6c7a21be 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -159,7 +159,7 @@ def build_parser(self) -> Markdown: def registerExtensions( self, extensions: Sequence[Extension | str], - configs: Mapping[str, Mapping[str, Any]] + configs: Mapping[str, dict[str, Any]] ) -> Markdown: """ Load a list of extensions into an instance of the `Markdown` class. @@ -491,8 +491,8 @@ def markdownFromFile(**kwargs: Any): [`convert`][markdown.Markdown.convert]. Keyword arguments: - input (str | TextIO): A file name or readable object. - output (str | TextIO): A file name or writable object. + input (str | BinaryIO): A file name or readable object. + output (str | BinaryIO): A file name or writable object. encoding (str): Encoding of input and output. **kwargs: Any arguments accepted by the `Markdown` class. diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index 070c4cce..a5ec07b2 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -27,7 +27,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Iterable, Mapping from ..util import parseBoolValue if TYPE_CHECKING: # pragma: no cover @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None: value = parseBoolValue(value, preserve_none=True) self.config[key][0] = value - def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]): + def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None: """ Loop through a collection of configuration options, passing each to [`setConfig`][markdown.extensions.Extension.setConfig]. diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index c060f475..738368af 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -43,10 +43,10 @@ class AbbrPreprocessor(BlockProcessor): RE = re.compile(r'^[*]\[(?P[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find and remove all Abbreviation references from the text. Each reference is set as a new `AbbrPattern` in the markdown instance. @@ -71,7 +71,7 @@ def run(self, parent, blocks): blocks.insert(0, block) return False - def _generate_pattern(self, text): + def _generate_pattern(self, text: str) -> str: """ Given a string, returns an regex pattern to match that string. @@ -90,11 +90,11 @@ def _generate_pattern(self, text): class AbbrInlineProcessor(InlineProcessor): """ Abbreviation inline pattern. """ - def __init__(self, pattern, title): + def __init__(self, pattern: str, title: str): super().__init__(pattern) self.title = title - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: abbr = etree.Element('abbr') abbr.text = AtomicString(m.group('abbr')) abbr.set('title', self.title) diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py index f05d0896..d0e97002 100644 --- a/markdown/extensions/admonition.py +++ b/markdown/extensions/admonition.py @@ -30,6 +30,10 @@ from ..blockprocessors import BlockProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import blockparser class AdmonitionExtension(Extension): @@ -49,15 +53,15 @@ class AdmonitionProcessor(BlockProcessor): RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') RE_SPACES = re.compile(' +') - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): """Initialization.""" super().__init__(parser) - self.current_sibling = None + self.current_sibling: etree.Element | None = None self.content_indention = 0 - def parse_content(self, parent, block): + def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]: """Get sibling admonition. Retrieve the appropriate sibling element. This can get tricky when @@ -115,14 +119,14 @@ def parse_content(self, parent, block): return sibling, block, the_rest - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: if self.RE.search(block): return True else: return self.parse_content(parent, block)[0] is not None - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) @@ -160,7 +164,7 @@ def run(self, parent, blocks): # list for future processing. blocks.insert(0, theRest) - def get_class_and_title(self, match): + def get_class_and_title(self, match: re.Match[str]) -> tuple[str, str | None]: klass, title = match.group(1).lower(), match.group(2) klass = self.RE_SPACES.sub(' ', klass) if title is None: diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 0c317d1b..7ce3f992 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -86,7 +86,7 @@ class AttrListTreeprocessor(Treeprocessor): r'\uf900-\ufdcf\ufdf0-\ufffd' r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') - def run(self, doc: Element): + def run(self, doc: Element) -> None: for elem in doc.iter(): if self.md.is_block_level(elem.tag): # Block level: check for `attrs` on last line of text diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index f8d25b0f..0114908f 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -24,6 +24,10 @@ from . import Extension from ..treeprocessors import Treeprocessor from ..util import parseBoolValue +from typing import TYPE_CHECKING, Callable, Any + +if TYPE_CHECKING: # pragma: no cover + import xml.etree.ElementTree as etree try: # pragma: no cover from pygments import highlight @@ -110,11 +114,11 @@ class CodeHilite: def __init__(self, src: str, **options): self.src = src - self.lang = options.pop('lang', None) - self.guess_lang = options.pop('guess_lang', True) - self.use_pygments = options.pop('use_pygments', True) - self.lang_prefix = options.pop('lang_prefix', 'language-') - self.pygments_formatter = options.pop('pygments_formatter', 'html') + self.lang: str | None = options.pop('lang', None) + self.guess_lang: bool = options.pop('guess_lang', True) + self.use_pygments: bool = options.pop('use_pygments', True) + self.lang_prefix: str = options.pop('lang_prefix', 'language-') + self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html') if 'linenos' not in options: options['linenos'] = options.pop('linenums', None) @@ -128,7 +132,7 @@ def __init__(self, src: str, **options): self.options = options - def hilite(self, shebang=True) -> str: + def hilite(self, shebang: bool = True) -> str: """ Pass code to the [Pygments](https://pygments.org/) highlighter with optional line numbers. The output should then be styled with CSS to @@ -187,7 +191,7 @@ def hilite(self, shebang=True) -> str: txt ) - def _parseHeader(self): + def _parseHeader(self) -> None: """ Determines language of a code block from shebang line and whether the said line should be removed or left in place. If the shebang line @@ -249,7 +253,9 @@ def _parseHeader(self): class HiliteTreeprocessor(Treeprocessor): """ Highlight source code in code blocks. """ - def code_unescape(self, text): + config: dict[str, Any] + + def code_unescape(self, text: str) -> str: """Unescape code.""" text = text.replace("<", "<") text = text.replace(">", ">") @@ -258,7 +264,7 @@ def code_unescape(self, text): text = text.replace("&", "&") return text - def run(self, root): + def run(self, root: etree.Element) -> None: """ Find code blocks and store in `htmlStash`. """ blocks = root.iter('pre') for block in blocks: diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py index 54273b60..5324bf19 100644 --- a/markdown/extensions/def_list.py +++ b/markdown/extensions/def_list.py @@ -33,10 +33,10 @@ class DefListProcessor(BlockProcessor): RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: raw_block = blocks.pop(0) m = self.RE.search(raw_block) @@ -99,7 +99,7 @@ class DefListIndentProcessor(ListIndentProcessor): LIST_TYPES = ['dl', 'ol', 'ul'] """ Include `dl` is list types. """ - def create_item(self, parent, block): + def create_item(self, parent: etree.Element, block: str) -> None: """ Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """ dd = etree.SubElement(parent, 'dd') diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 241bb6d4..da1a9be1 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -29,6 +29,10 @@ from ..util import parseBoolValue from ..serializers import _escape_attrib_html import re +from typing import TYPE_CHECKING, Any, Iterable + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class FencedCodeExtension(Extension): @@ -62,11 +66,11 @@ class FencedBlockPreprocessor(Preprocessor): re.MULTILINE | re.DOTALL | re.VERBOSE ) - def __init__(self, md, config): + def __init__(self, md: Markdown, config: dict[str, Any]): super().__init__(md) self.config = config self.checked_for_deps = False - self.codehilite_conf = {} + self.codehilite_conf: dict[str, Any] = {} self.use_attr_list = False # List of options to convert to boolean values self.bool_options = [ @@ -76,7 +80,7 @@ def __init__(self, md, config): 'use_pygments' ] - def run(self, lines): + def run(self, lines: list[str]) -> list[str]: """ Match and store Fenced Code Blocks in the `HtmlStash`. """ # Check for dependent extensions @@ -151,7 +155,7 @@ def run(self, lines): break return text.split("\n") - def handle_attrs(self, attrs): + def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]: """ Return tuple: `(id, [list, of, classes], {configs})` """ id = '' classes = [] @@ -169,7 +173,7 @@ def handle_attrs(self, attrs): configs[k] = v return id, classes, configs - def _escape(self, txt): + def _escape(self, txt: str) -> str: """ basic html escaping """ txt = txt.replace('&', '&') txt = txt.replace('<', '<') diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 2424dbc8..30c08113 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -68,8 +68,8 @@ def __init__(self, **kwargs): # In multiple invocations, emit links that don't get tangled. self.unique_prefix = 0 - self.found_refs = {} - self.used_refs = set() + self.found_refs: dict[str, int] = {} + self.used_refs: set[str] = set() self.reset() @@ -105,7 +105,7 @@ def reset(self) -> None: self.found_refs = {} self.used_refs = set() - def unique_ref(self, reference, found: bool = False): + def unique_ref(self, reference: str, found: bool = False) -> str: """ Get a unique reference if there are duplicates. """ if not found: return reference @@ -126,7 +126,9 @@ def unique_ref(self, reference, found: bool = False): self.found_refs[original_ref] = 1 return reference - def findFootnotesPlaceholder(self, root): + def findFootnotesPlaceholder( + self, root: etree.Element + ) -> tuple[etree.Element, etree.Element, bool] | None: """ Return ElementTree Element that contains Footnote placeholder. """ def finder(element): for child in element: @@ -144,29 +146,29 @@ def finder(element): res = finder(root) return res - def setFootnote(self, id, text) -> None: + def setFootnote(self, id: str, text: str) -> None: """ Store a footnote for later retrieval. """ self.footnotes[id] = text - def get_separator(self): + def get_separator(self) -> str: """ Get the footnote separator. """ return self.getConfig("SEPARATOR") - def makeFootnoteId(self, id): + def makeFootnoteId(self, id: str) -> str: """ Return footnote link id. """ if self.getConfig("UNIQUE_IDS"): return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) else: return 'fn{}{}'.format(self.get_separator(), id) - def makeFootnoteRefId(self, id, found: bool = False): + def makeFootnoteRefId(self, id: str, found: bool = False) -> str: """ Return footnote back-link id. """ if self.getConfig("UNIQUE_IDS"): return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) else: return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found) - def makeFootnotesDiv(self, root): + def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: """ Return `div` of footnotes as `etree` Element. """ if not list(self.footnotes.keys()): @@ -216,14 +218,14 @@ class FootnoteBlockProcessor(BlockProcessor): RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): super().__init__(footnotes.parser) self.footnotes = footnotes - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find, set, and remove footnote definitions. """ block = blocks.pop(0) m = self.RE.search(block) @@ -259,7 +261,7 @@ def run(self, parent, blocks): blocks.insert(0, block) return False - def detectTabbed(self, blocks) -> list[str]: + def detectTabbed(self, blocks: list[str]) -> list[str]: """ Find indented text and remove indent before further processing. Returns: @@ -288,7 +290,7 @@ def detectTabbed(self, blocks) -> list[str]: break return fn_blocks - def detab(self, block): + def detab(self, block: str) -> str: """ Remove one level of indent from a block. Preserve lazily indented blocks by only removing indent from indented lines. @@ -303,11 +305,11 @@ def detab(self, block): class FootnoteInlineProcessor(InlineProcessor): """ `InlineProcessor` for footnote markers in a document's body text. """ - def __init__(self, pattern, footnotes): + def __init__(self, pattern: str, footnotes: FootnoteExtension): super().__init__(pattern) self.footnotes = footnotes - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: id = m.group(1) if id in self.footnotes.footnotes.keys(): sup = etree.Element("sup") @@ -326,10 +328,10 @@ def handleMatch(self, m, data): class FootnotePostTreeprocessor(Treeprocessor): """ Amend footnote div with duplicates. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def add_duplicates(self, li, duplicates) -> None: + def add_duplicates(self, li: etree.Element, duplicates: int) -> None: """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """ for link in li.iter('a'): # Find the link that needs to be duplicated. @@ -349,13 +351,13 @@ def add_duplicates(self, li, duplicates) -> None: el.append(link) break - def get_num_duplicates(self, li): + def get_num_duplicates(self, li: etree.Element) -> int: """ Get the number of duplicate refs of the footnote. """ fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) return self.footnotes.found_refs.get(link_id, 0) - def handle_duplicates(self, parent) -> None: + def handle_duplicates(self, parent: etree.Element) -> None: """ Find duplicate footnotes and format and add the duplicates. """ for li in list(parent): # Check number of duplicates footnotes and insert @@ -364,7 +366,7 @@ def handle_duplicates(self, parent) -> None: if count > 1: self.add_duplicates(li, count) - def run(self, root): + def run(self, root: etree.Element) -> None: """ Crawl the footnote div and add missing duplicate footnotes. """ self.offset = 0 for div in root.iter('div'): @@ -379,10 +381,10 @@ def run(self, root): class FootnoteTreeprocessor(Treeprocessor): """ Build and append footnote div to end of document. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def run(self, root): + def run(self, root: etree.Element) -> None: footnotesDiv = self.footnotes.makeFootnotesDiv(root) if footnotesDiv is not None: result = self.footnotes.findFootnotesPlaceholder(root) @@ -401,10 +403,10 @@ def run(self, root): class FootnotePostprocessor(Postprocessor): """ Replace placeholders with html entities. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def run(self, text): + def run(self, text: str) -> str: text = text.replace( FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") ) diff --git a/markdown/extensions/legacy_attrs.py b/markdown/extensions/legacy_attrs.py index 56ad2e89..6641e6ea 100644 --- a/markdown/extensions/legacy_attrs.py +++ b/markdown/extensions/legacy_attrs.py @@ -33,13 +33,17 @@ import re from markdown.treeprocessors import Treeprocessor, isString from markdown.extensions import Extension +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + import xml.etree.ElementTree as etree ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} class LegacyAttrs(Treeprocessor): - def run(self, doc): + def run(self, doc: etree.Element) -> None: """Find and set values of attributes ({@key=value}). """ for el in doc.iter(): alt = el.get('alt', None) @@ -50,9 +54,9 @@ def run(self, doc): if el.tail and isString(el.tail): el.tail = self.handleAttributes(el, el.tail) - def handleAttributes(self, el, txt): + def handleAttributes(self, el: etree.Element, txt: str) -> str: """ Set attributes and return text without definitions. """ - def attributeCallback(match): + def attributeCallback(match: re.Match[str]): el.set(match.group(1), match.group(2).replace('\n', ' ')) return ATTR_RE.sub(attributeCallback, txt) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 982d6039..64b84a5f 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -28,6 +28,10 @@ from .. import util from ..htmlparser import HTMLExtractor, blank_line_re import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING, Literal, Mapping + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class HTMLExtractorExtra(HTMLExtractor): @@ -36,7 +40,7 @@ class HTMLExtractorExtra(HTMLExtractor): Markdown. """ - def __init__(self, md, *args, **kwargs): + def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. self.block_level_tags = set(md.block_level_elements.copy()) # Block-level tags in which the content only gets span level parsing @@ -54,9 +58,9 @@ def __init__(self, md, *args, **kwargs): def reset(self): """Reset this instance. Loses all unprocessed data.""" - self.mdstack = [] # When markdown=1, stack contains a list of tags + self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() - self.mdstate = [] # one of 'block', 'span', 'off', or None + self.mdstate: list[Literal['block', 'span', 'off', None]] = [] super().reset() def close(self): @@ -67,13 +71,13 @@ def close(self): # Close the outermost parent. `handle_endtag` will close all unclosed children. self.handle_endtag(self.mdstack[0]) - def get_element(self): + def get_element(self) -> etree.Element: """ Return element from `treebuilder` and reset `treebuilder` for later use. """ element = self.treebuilder.close() self.treebuilder = etree.TreeBuilder() return element - def get_state(self, tag, attrs): + def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', 'off', None]: """ Return state from tag and `markdown` attribute. One of 'block', 'span', or 'off'. """ md_attr = attrs.get('markdown', '0') if md_attr == 'markdown': @@ -215,7 +219,7 @@ def handle_empty_tag(self, data, is_block): else: self.handle_data(self.md.htmlStash.store(data)) - def parse_pi(self, i): + def parse_pi(self, i: int) -> int: if self.at_line_start() or self.intail or self.mdstack: # The same override exists in `HTMLExtractor` without the check # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead. @@ -225,7 +229,7 @@ def parse_pi(self, i): self.handle_data('<?') return i + 2 - def parse_html_declaration(self, i): + def parse_html_declaration(self, i: int) -> int: if self.at_line_start() or self.intail or self.mdstack: # The same override exists in `HTMLExtractor` without the check # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead. @@ -239,7 +243,7 @@ def parse_html_declaration(self, i): class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" - def run(self, lines): + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) parser = HTMLExtractorExtra(self.md) parser.feed(source) @@ -250,11 +254,11 @@ def run(self, lines): class MarkdownInHtmlProcessor(BlockProcessor): """Process Markdown Inside HTML Blocks which have been stored in the `HtmlStash`.""" - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: # Always return True. `run` will return `False` it not a valid match. return True - def parse_element_content(self, element): + def parse_element_content(self, element: etree.Element) -> None: """ Recursively parse the text content of an `etree` Element as Markdown. @@ -324,7 +328,7 @@ def parse_element_content(self, element): if child.tail: child.tail = util.AtomicString(child.tail) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: m = util.HTML_PLACEHOLDER_RE.match(blocks[0]) if m: index = int(m.group(1)) @@ -344,7 +348,7 @@ def run(self, parent, blocks): class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor): - def stash_to_string(self, text): + def stash_to_string(self, text: str | etree.Element) -> str: """ Override default to handle any `etree` elements still in the stash. """ if isinstance(text, etree.Element): return self.md.serializer(text) diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index 82179273..cb703399 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -25,6 +25,7 @@ from ..preprocessors import Preprocessor import re import logging +from typing import Any log = logging.getLogger('MARKDOWN') @@ -51,9 +52,9 @@ def reset(self) -> None: class MetaPreprocessor(Preprocessor): """ Get Meta-Data. """ - def run(self, lines): + def run(self, lines: list[str]) -> list[str]: """ Parse Meta-Data and store in Markdown.Meta. """ - meta = {} + meta: dict[str, Any] = {} key = None if lines and BEGIN_RE.match(lines[0]): lines.pop(0) diff --git a/markdown/extensions/sane_lists.py b/markdown/extensions/sane_lists.py index 305bd992..be421f94 100644 --- a/markdown/extensions/sane_lists.py +++ b/markdown/extensions/sane_lists.py @@ -24,6 +24,10 @@ from . import Extension from ..blockprocessors import OListProcessor, UListProcessor import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from .. import blockparser class SaneOListProcessor(OListProcessor): @@ -34,7 +38,7 @@ class SaneOListProcessor(OListProcessor): LAZY_OL = False """ Disable lazy list behavior. """ - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.))[ ]+(.*)' % (self.tab_length - 1)) @@ -46,7 +50,7 @@ class SaneUListProcessor(UListProcessor): SIBLING_TAGS = ['ul'] """ Exclude `ol` from list of siblings. """ - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % (self.tab_length - 1)) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 3274bf86..0ce7772a 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -90,7 +90,13 @@ from ..inlinepatterns import HtmlInlineProcessor, HTML_RE from ..treeprocessors import InlineProcessor from ..util import Registry +from typing import TYPE_CHECKING, Sequence +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + from .. import inlinepatterns + import re + import xml.etree.ElementTree as etree # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" @@ -155,13 +161,13 @@ class SubstituteTextPattern(HtmlInlineProcessor): - def __init__(self, pattern, replace, md): + def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown): """ Replaces matches with some text. """ HtmlInlineProcessor.__init__(self, pattern) self.replace = replace self.md = md - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: result = '' for part in self.replace: if isinstance(part, int): @@ -183,17 +189,23 @@ def __init__(self, **kwargs): } """ Default configuration options. """ super().__init__(**kwargs) - self.substitutions = dict(substitutions) + self.substitutions: dict[str, str] = dict(substitutions) self.substitutions.update(self.getConfig('substitutions', default={})) - def _addPatterns(self, md, patterns, serie, priority): + def _addPatterns( + self, + md: Markdown, + patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]], + serie: str, + priority: int, + ): for ind, pattern in enumerate(patterns): pattern += (md,) pattern = SubstituteTextPattern(*pattern) name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) - def educateDashes(self, md) -> None: + def educateDashes(self, md: Markdown) -> None: emDashesPattern = SubstituteTextPattern( r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md ) @@ -203,13 +215,13 @@ def educateDashes(self, md) -> None: self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) - def educateEllipses(self, md) -> None: + def educateEllipses(self, md: Markdown) -> None: ellipsesPattern = SubstituteTextPattern( r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md ) self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) - def educateAngledQuotes(self, md) -> None: + def educateAngledQuotes(self, md: Markdown) -> None: leftAngledQuotePattern = SubstituteTextPattern( r'\<\<', (self.substitutions['left-angle-quote'],), md ) @@ -219,7 +231,7 @@ def educateAngledQuotes(self, md) -> None: self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) - def educateQuotes(self, md) -> None: + def educateQuotes(self, md: Markdown) -> None: lsquo = self.substitutions['left-single-quote'] rsquo = self.substitutions['right-single-quote'] ldquo = self.substitutions['left-double-quote'] @@ -243,7 +255,7 @@ def educateQuotes(self, md) -> None: def extendMarkdown(self, md): configs = self.getConfigs() - self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry() + self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: self.educateEllipses(md) if configs['smart_quotes']: diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index a9e5f13d..6e2fa174 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -25,6 +25,11 @@ from ..blockprocessors import BlockProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING, Any, Sequence + +if TYPE_CHECKING: # pragma: no cover + from .. import blockparser + PIPE_NONE = 0 PIPE_LEFT = 1 PIPE_RIGHT = 2 @@ -36,14 +41,14 @@ class TableProcessor(BlockProcessor): RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') - def __init__(self, parser, config): - self.border = False - self.separator = '' + def __init__(self, parser: blockparser.BlockParser, config: dict[str, Any]): + self.border: bool | int = False + self.separator: Sequence[str] = '' self.config = config super().__init__(parser) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: """ Ensure first two rows (column header and separator row) are valid table rows. @@ -79,14 +84,14 @@ def test(self, parent, block): return is_table - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: """ Parse a table block and build table. """ block = blocks.pop(0).split('\n') header = block[0].strip(' ') rows = [] if len(block) < 3 else block[2:] # Get alignment of columns - align = [] + align: list[str | None] = [] for c in self.separator: c = c.strip(' ') if c.startswith(':') and c.endswith(':'): @@ -110,7 +115,7 @@ def run(self, parent, blocks): for row in rows: self._build_row(row.strip(' '), tbody, align) - def _build_empty_row(self, parent, align): + def _build_empty_row(self, parent: etree.Element, align: Sequence[str | None]) -> None: """Build an empty row.""" tr = etree.SubElement(parent, 'tr') count = len(align) @@ -118,7 +123,7 @@ def _build_empty_row(self, parent, align): etree.SubElement(tr, 'td') count -= 1 - def _build_row(self, row, parent, align): + def _build_row(self, row: str, parent: etree.Element, align: Sequence[str | None]) -> None: """ Given a row of text, build table cells. """ tr = etree.SubElement(parent, 'tr') tag = 'td' @@ -139,7 +144,7 @@ def _build_row(self, row, parent, align): else: c.set('style', f'text-align: {a};') - def _split_row(self, row): + def _split_row(self, row: str) -> list[str]: """ split a row of text into list of cells. """ if self.border: if row.startswith('|'): @@ -147,7 +152,7 @@ def _split_row(self, row): row = self.RE_END_BORDER.sub('', row) return self._split(row) - def _split(self, row): + def _split(self, row: str) -> list[str]: """ split a row of text with some code into a list of cells. """ elements = [] pipes = [] diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 64c20c80..a17d7241 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -27,9 +27,13 @@ import html import unicodedata import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING, Any, Iterator, MutableSet +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown -def slugify(value, separator, unicode=False): + +def slugify(value: str, separator: str, unicode: bool = False) -> str: """ Slugify a string, to make it URL friendly. """ if not unicode: # Replace Extended Latin characters with ASCII, i.e. `žlutý` => `zluty` @@ -39,7 +43,7 @@ def slugify(value, separator, unicode=False): return re.sub(r'[{}\s]+'.format(separator), separator, value) -def slugify_unicode(value, separator): +def slugify_unicode(value: str, separator: str) -> str: """ Slugify a string, to make it URL friendly while preserving Unicode characters. """ return slugify(value, separator, unicode=True) @@ -47,7 +51,7 @@ def slugify_unicode(value, separator): IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') -def unique(id, ids): +def unique(id: str, ids: MutableSet[str]) -> str: """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ while id in ids or not id: m = IDCOUNT_RE.match(id) @@ -59,7 +63,7 @@ def unique(id, ids): return id -def get_name(el): +def get_name(el: etree.Element) -> str: """Get title name.""" text = [] @@ -71,9 +75,9 @@ def get_name(el): return ''.join(text).strip() -def stashedHTML2text(text, md, strip_entities: bool = True): +def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> str: """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ - def _html_sub(m): + def _html_sub(m: re.Match[str]) -> str: """ Substitute raw html with plain text. """ try: raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] @@ -88,7 +92,7 @@ def _html_sub(m): return HTML_PLACEHOLDER_RE.sub(_html_sub, text) -def unescape(text): +def unescape(text: str) -> str: """ Unescape escaped text. """ c = UnescapeTreeprocessor() return c.unescape(text) @@ -162,24 +166,24 @@ def nest_toc_tokens(toc_list): class TocTreeprocessor(Treeprocessor): """ Step through document and build TOC. """ - def __init__(self, md, config): + def __init__(self, md: Markdown, config: dict[str, Any]): super().__init__(md) - self.marker = config["marker"] - self.title = config["title"] + self.marker: str = config["marker"] + self.title: str = config["title"] self.base_level = int(config["baselevel"]) - 1 self.slugify = config["slugify"] self.sep = config["separator"] self.toc_class = config["toc_class"] - self.title_class = config["title_class"] - self.use_anchors = parseBoolValue(config["anchorlink"]) - self.anchorlink_class = config["anchorlink_class"] + self.title_class: str = config["title_class"] + self.use_anchors: bool = parseBoolValue(config["anchorlink"]) + self.anchorlink_class: str = config["anchorlink_class"] self.use_permalinks = parseBoolValue(config["permalink"], False) if self.use_permalinks is None: self.use_permalinks = config["permalink"] - self.permalink_class = config["permalink_class"] - self.permalink_title = config["permalink_title"] - self.permalink_leading = parseBoolValue(config["permalink_leading"], False) + self.permalink_class: str = config["permalink_class"] + self.permalink_title: str = config["permalink_title"] + self.permalink_leading: bool | None = parseBoolValue(config["permalink_leading"], False) self.header_rgx = re.compile("[Hh][123456]") if isinstance(config["toc_depth"], str) and '-' in config["toc_depth"]: self.toc_top, self.toc_bottom = [int(x) for x in config["toc_depth"].split('-')] @@ -187,7 +191,7 @@ def __init__(self, md, config): self.toc_top = 1 self.toc_bottom = int(config["toc_depth"]) - def iterparent(self, node): + def iterparent(self, node: etree.Element) -> Iterator[tuple[etree.Element, etree.Element]]: """ Iterator wrapper to get allowed parent and child all at once. """ # We do not allow the marker inside a header as that @@ -198,7 +202,7 @@ def iterparent(self, node): yield node, child yield from self.iterparent(child) - def replace_marker(self, root, elem) -> None: + def replace_marker(self, root: etree.Element, elem: etree.Element) -> None: """ Replace marker with elem. """ for (p, c) in self.iterparent(root): text = ''.join(c.itertext()).strip() @@ -219,14 +223,14 @@ def replace_marker(self, root, elem) -> None: p[i] = elem break - def set_level(self, elem) -> None: + def set_level(self, elem: etree.Element) -> None: """ Adjust header level according to base level. """ level = int(elem.tag[-1]) + self.base_level if level > 6: level = 6 elem.tag = 'h%d' % level - def add_anchor(self, c, elem_id) -> None: + def add_anchor(self, c: etree.Element, elem_id: str) -> None: anchor = etree.Element("a") anchor.text = c.text anchor.attrib["href"] = "#" + elem_id @@ -238,7 +242,7 @@ def add_anchor(self, c, elem_id) -> None: c.remove(c[0]) c.append(anchor) - def add_permalink(self, c, elem_id) -> None: + def add_permalink(self, c: etree.Element, elem_id: str) -> None: permalink = etree.Element("a") permalink.text = ("%spara;" % AMP_SUBSTITUTE if self.use_permalinks is True @@ -254,7 +258,7 @@ def add_permalink(self, c, elem_id) -> None: else: c.append(permalink) - def build_toc_div(self, toc_list): + def build_toc_div(self, toc_list: list) -> etree.Element: """ Return a string div given a toc list. """ div = etree.Element("div") div.attrib["class"] = self.toc_class @@ -266,7 +270,7 @@ def build_toc_div(self, toc_list): header.attrib["class"] = self.title_class header.text = self.title - def build_etree_ul(toc_list, parent): + def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element: ul = etree.SubElement(parent, "ul") for item in toc_list: # List item link, to be inserted into the toc div @@ -285,7 +289,7 @@ def build_etree_ul(toc_list, parent): return div - def run(self, doc): + def run(self, doc: etree.Element) -> None: # Get a list of id attributes used_ids = set() for el in doc.iter(): diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 9d5acfa3..3f3cbe2d 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -25,9 +25,10 @@ from ..inlinepatterns import InlineProcessor import xml.etree.ElementTree as etree import re +from typing import Any -def build_url(label, base, end): +def build_url(label: str, base: str, end: str) -> str: """ Build a URL from the label, a base, and an end. """ clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label) return '{}{}{}'.format(base, clean_label, end) @@ -59,11 +60,11 @@ def extendMarkdown(self, md): class WikiLinksInlineProcessor(InlineProcessor): """ Build link from `wikilink`. """ - def __init__(self, pattern, config): + def __init__(self, pattern: str, config: dict[str, Any]): super().__init__(pattern) self.config = config - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: if m.group(1).strip(): base_url, end_url, html_class = self._getMeta() label = m.group(1).strip() @@ -77,7 +78,7 @@ def handleMatch(self, m, data): a = '' return a, m.start(0), m.end(0) - def _getMeta(self): + def _getMeta(self) -> tuple[str, str, str]: """ Return meta data or `config` data. """ base_url = self.config['base_url'] end_url = self.config['end_url'] diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 29e23009..5155ef69 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,6 +28,10 @@ import re import importlib.util import sys +from typing import TYPE_CHECKING, Sequence + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. @@ -76,7 +80,7 @@ class HTMLExtractor(htmlparser.HTMLParser): is stored in `cleandoc` as a list of strings. """ - def __init__(self, md, *args, **kwargs): + def __init__(self, md: Markdown, *args, **kwargs): if 'convert_charrefs' not in kwargs: kwargs['convert_charrefs'] = False @@ -93,9 +97,9 @@ def reset(self): """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False - self.stack = [] # When `inraw==True`, stack contains a list of tags - self._cache = [] - self.cleandoc = [] + self.stack: list[str] = [] # When `inraw==True`, stack contains a list of tags + self._cache: list[str] = [] + self.cleandoc: list[str] = [] self.lineno_start_cache = [0] super().reset() @@ -156,7 +160,7 @@ def get_endtag_text(self, tag: str) -> str: # Failed to extract from raw data. Assume well formed and lowercase. return '</{}>'.format(tag) - def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]): + def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]): # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags: self.handle_startendtag(tag, attrs) @@ -235,7 +239,7 @@ def handle_empty_tag(self, data: str, is_block: bool): else: self.cleandoc.append(data) - def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]): + def handle_startendtag(self, tag: str, attrs): self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag)) def handle_charref(self, name: str): diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 296ab834..ef6f0fbc 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -217,6 +217,9 @@ class initialization, the `^(.*)` and `(.*)!` are added automatically and the re would cause the content to be a descendant of one of the listed tag names. """ + compiled_re: re.Pattern[str] + md: Markdown | None + def __init__(self, pattern: str, md: Markdown | None = None): """ Create an instant of an inline pattern. @@ -429,7 +432,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, class BacktickInlineProcessor(InlineProcessor): """ Return a `<code>` element containing the escaped matching text. """ - def __init__(self, pattern): + def __init__(self, pattern: str): InlineProcessor.__init__(self, pattern) self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX) self.tag = 'code' @@ -502,14 +505,14 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: place_holder = self.md.htmlStash.store(rawhtml) return place_holder, m.start(0), m.end(0) - def unescape(self, text): + def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: stash = self.md.treeprocessors['inline'].stashed_nodes except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) value = stash.get(id) if value is not None: @@ -520,14 +523,14 @@ def get_stash(m): return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) - def backslash_unescape(self, text): + def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: RE = self.md.treeprocessors['unescape'].RE except KeyError: # pragma: no cover return text - def _unescape(m): + def _unescape(m: re.Match[str]) -> str: return chr(int(m.group(1))) return RE.sub(_unescape, text) @@ -545,14 +548,14 @@ class AsteriskProcessor(InlineProcessor): ] """ The various strong and emphasis patterns handled by this processor. """ - def build_single(self, m, tag, idx): + def build_single(self, m: re.Match[str], tag: str, idx: int) -> etree.Element: """Return single tag.""" el1 = etree.Element(tag) text = m.group(2) self.parse_sub_patterns(text, el1, None, idx) return el1 - def build_double(self, m, tags, idx): + def build_double(self, m: re.Match[str], tags: str, idx: int) -> etree.Element: """Return double tag.""" tag1, tag2 = tags.split(",") @@ -566,7 +569,7 @@ def build_double(self, m, tags, idx): self.parse_sub_patterns(text, el1, el2, idx) return el1 - def build_double2(self, m, tags, idx): + def build_double2(self, m: re.Match[str], tags: str, idx: int) -> etree.Element: """Return double tags (variant 2): `<strong>text <em>text</em></strong>`.""" tag1, tag2 = tags.split(",") @@ -579,22 +582,19 @@ def build_double2(self, m, tags, idx): self.parse_sub_patterns(text, el2, None, idx) return el1 - def parse_sub_patterns(self, data, parent, last, idx) -> None: + def parse_sub_patterns( + self, data: str, parent: etree.Element, last: etree.Element | None, idx: int + ) -> None: """ Parses sub patterns. - `data` (`str`): - text to evaluate. - - `parent` (`etree.Element`): - Parent to attach text and sub elements to. + `data`: text to evaluate. - `last` (`etree.Element`): - Last appended child to parent. Can also be None if parent has no children. + `parent`: Parent to attach text and sub elements to. - `idx` (`int`): - Current pattern index that was used to evaluate the parent. + `last`: Last appended child to parent. Can also be None if parent has no children. + `idx`: Current pattern index that was used to evaluate the parent. """ offset = 0 @@ -643,7 +643,7 @@ def parse_sub_patterns(self, data, parent, last, idx) -> None: else: parent.text = text - def build_element(self, m, builder, tags, index): + def build_element(self, m: re.Match[str], builder: str, tags: str, index: int) -> etree.Element: """Element builder.""" if builder == 'double2': @@ -709,11 +709,11 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None return el, m.start(0), index - def getLink(self, data, index): + def getLink(self, data: str, index: int) -> tuple[str, str | None, int, bool]: """Parse data between `()` of `[Text]()` allowing recursive `()`. """ href = '' - title = None + title: str | None = None handled = False m = self.RE_LINK.match(data, pos=index) @@ -733,7 +733,7 @@ def getLink(self, data, index): last_bracket = -1 # Primary (first found) quote tracking. - quote = None + quote: str | None = None start_quote = -1 exit_quote = -1 ignore_matches = False @@ -825,7 +825,7 @@ def getLink(self, data, index): return href, title, index, handled - def getText(self, data, index): + def getText(self, data: str, index: int) -> tuple[str, int, bool]: """Parse the content between `[]` of the start of an image or link resolving nested square brackets. @@ -897,7 +897,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None return self.makeTag(href, title, text), m.start(0), end - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]: """ Evaluate the id portion of `[ref][id]`. @@ -927,7 +927,7 @@ def makeTag(self, href: str, title: str, text: str) -> etree.Element: class ShortReferenceInlineProcessor(ReferenceInlineProcessor): """Short form of reference: `[google]`. """ - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]: """Evaluate the id of `[ref]`. """ return text.lower(), index, True @@ -947,7 +947,7 @@ def makeTag(self, href: str, title: str, text: str) -> etree.Element: class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor): """ Short form of image reference: `![ref]`. """ - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]: """Evaluate the id of `[ref]`. """ return text.lower(), index, True @@ -974,7 +974,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, if email.startswith("mailto:"): email = email[len("mailto:"):] - def codepoint2name(code): + def codepoint2name(code: int) -> str: """Return entity definition by code, or the code if not defined.""" entity = entities.codepoint2name.get(code) if entity: diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 3da5ee1a..7f5ede90 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -71,7 +71,7 @@ class RawHtmlPostprocessor(Postprocessor): BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') - def run(self, text: str): + def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): @@ -81,7 +81,7 @@ def run(self, text: str): self.md.htmlStash.get_placeholder(i))] = html replacements[self.md.htmlStash.get_placeholder(i)] = html - def substitute_match(m): + def substitute_match(m: re.Match[str]) -> str: key = m.group(0) if key not in replacements: @@ -122,7 +122,7 @@ def stash_to_string(self, text: str) -> str: class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ - def run(self, text): + def run(self, text: str) -> str: text = text.replace(util.AMP_SUBSTITUTE, "&") return text @@ -136,8 +136,8 @@ class UnescapePostprocessor(Postprocessor): RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) - def unescape(self, m): + def unescape(self, m: re.Match[str]) -> str: return chr(int(m.group(1))) - def run(self, text): + def run(self, text: str) -> str: return self.RE.sub(self.unescape, text) diff --git a/markdown/serializers.py b/markdown/serializers.py index 5a8818e2..573b2648 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -48,19 +48,20 @@ from xml.etree.ElementTree import ProcessingInstruction from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY import re +from typing import Callable, Literal, NoReturn __all__ = ['to_html_string', 'to_xhtml_string'] RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) -def _raise_serialization_error(text): # pragma: no cover +def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover raise TypeError( "cannot serialize {!r} (type {})".format(text, type(text).__name__) ) -def _escape_cdata(text): +def _escape_cdata(text) -> str: # escape character data try: # it's worth avoiding do-nothing calls for strings that are @@ -78,7 +79,7 @@ def _escape_cdata(text): _raise_serialization_error(text) -def _escape_attrib(text): +def _escape_attrib(text: str) -> str: # escape attribute value try: if "&" in text: @@ -97,7 +98,7 @@ def _escape_attrib(text): _raise_serialization_error(text) -def _escape_attrib_html(text): +def _escape_attrib_html(text: str) -> str: # escape attribute value try: if "&" in text: @@ -114,7 +115,7 @@ def _escape_attrib_html(text): _raise_serialization_error(text) -def _serialize_html(write, elem, format): +def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None: tag = elem.tag text = elem.text if tag is Comment: @@ -171,9 +172,9 @@ def _serialize_html(write, elem, format): write(_escape_cdata(elem.tail)) -def _write_html(root, format="html"): +def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str: assert root is not None - data = [] + data: list[str] = [] write = data.append _serialize_html(write, root, format) return "".join(data) diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 59a3eb3c..dc857204 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -45,7 +45,7 @@ def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeproce return treeprocessors -def isString(s: Any) -> bool: +def isString(s: object) -> bool: """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """ if not isinstance(s, util.AtomicString): return isinstance(s, str) @@ -77,7 +77,7 @@ class InlineProcessor(Treeprocessor): A `Treeprocessor` that traverses a tree, applying inline patterns. """ - def __init__(self, md): + def __init__(self, md: Markdown): self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX self.__placeholder_suffix = util.ETX self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ @@ -85,9 +85,9 @@ def __init__(self, md): self.__placeholder_re = util.INLINE_PLACEHOLDER_RE self.md = md self.inlinePatterns = md.inlinePatterns - self.ancestors = [] + self.ancestors: list[str] = [] - def __makePlaceholder(self, type) -> tuple[str, str]: + def __makePlaceholder(self, type: str) -> tuple[str, str]: """ Generate a placeholder """ id = "%04d" % len(self.stashed_nodes) hash = util.INLINE_PLACEHOLDER % id @@ -111,7 +111,7 @@ def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]: else: return None, index + 1 - def __stashNode(self, node, type) -> str: + def __stashNode(self, node: etree.Element | str, type: str) -> str: """ Add node to stash. """ placeholder, id = self.__makePlaceholder(type) self.stashed_nodes[id] = node @@ -140,7 +140,7 @@ def __handleInline(self, data: str, patternIndex: int = 0) -> str: patternIndex += 1 return data - def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True): + def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None: """ Process placeholders in `Element.text` or `Element.tail` of Elements popped from `self.stashed_nodes`. @@ -171,10 +171,10 @@ def __processElementText(self, node: etree.Element, subnode: etree.Element, isTe def __processPlaceholders( self, - data: str, + data: str | None, parent: etree.Element, isText: bool = True - ) -> list[tuple[etree.Element, Any]]: + ) -> list[tuple[etree.Element, list[str]]]: """ Process string with placeholders and generate `ElementTree` tree. @@ -187,7 +187,7 @@ def __processPlaceholders( List with `ElementTree` elements with applied inline patterns. """ - def linkText(text): + def linkText(text: str | None) -> None: if text: if result: if result[-1][0].tail: @@ -330,7 +330,7 @@ def __applyPattern( match.group(1), placeholder, match.groups()[-1]), True, 0 - def __build_ancestors(self, parent, parents): + def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None: """Build the ancestor list.""" ancestors = [] while parent is not None: @@ -358,7 +358,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. An element tree object with applied inline patterns. """ - self.stashed_nodes: dict[str, etree.Element] = {} + self.stashed_nodes: dict[str, etree.Element | str] = {} # Ensure a valid parent list, but copy passed in lists # to ensure we don't have the user accidentally change it on us. @@ -415,7 +415,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. class PrettifyTreeprocessor(Treeprocessor): """ Add line breaks to the html document. """ - def _prettifyETree(self, elem): + def _prettifyETree(self, elem: etree.Element) -> None: """ Recursively add line breaks to `ElementTree` children. """ i = "\n" @@ -456,13 +456,13 @@ class UnescapeTreeprocessor(Treeprocessor): RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) - def _unescape(self, m): + def _unescape(self, m: re.Match[str]) -> str: return chr(int(m.group(1))) def unescape(self, text: str) -> str: return self.RE.sub(self._unescape, text) - def run(self, root): + def run(self, root: etree.Element) -> None: """ Loop over all elements and unescape all text. """ for elem in root.iter(): # Unescape text content diff --git a/markdown/util.py b/markdown/util.py index 827befd8..b4642023 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -29,10 +29,11 @@ import warnings from functools import wraps, lru_cache from itertools import count -from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, overload +from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + import xml.etree.ElementTree as etree _T = TypeVar('_T') @@ -164,7 +165,7 @@ def code_escape(text: str) -> str: return text -def _get_stack_depth(size=2): +def _get_stack_depth(size: int = 2) -> int: """Get current stack depth, performantly. """ frame = sys._getframe(size) @@ -203,6 +204,14 @@ def __init__(self, md: Markdown | None = None): self.md = md +if TYPE_CHECKING: # pragma: no cover + class TagData(TypedDict): + tag: str + attrs: dict[str, str] + left_index: int + right_index: int + + class HtmlStash: """ This class is used for stashing HTML objects that we extract @@ -212,11 +221,11 @@ class HtmlStash: def __init__(self): """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks = [] + self.rawHtmlBlocks: list[str | etree.Element] = [] self.tag_counter = 0 - self.tag_data = [] # list of dictionaries in the order tags appear + self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear - def store(self, html: str) -> str: + def store(self, html: str | etree.Element) -> str: """ Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the @@ -242,7 +251,7 @@ def reset(self) -> None: def get_placeholder(self, key: int) -> str: return HTML_PLACEHOLDER % key - def store_tag(self, tag: str, attrs: list, left_index: int, right_index: int) -> str: + def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str: """Store tag data and return a placeholder.""" self.tag_data.append({'tag': tag, 'attrs': attrs, 'left_index': left_index, @@ -302,7 +311,7 @@ class Registry(Generic[_T]): def __init__(self): self._data: dict[str, _T] = {} - self._priority = [] + self._priority: list[_PriorityItem] = [] self._is_sorted = False def __contains__(self, item: str | _T) -> bool: @@ -388,7 +397,7 @@ def deregister(self, name: str, strict: bool = True) -> None: if strict: raise - def _sort(self): + def _sort(self) -> None: """ Sort the registry by priority from highest to lowest.