mitmproxy · mhils · Apr 24, 2023 · Apr 24, 2023 · Apr 24, 2023 · Apr 24, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,13 @@
 
 <!-- ✨ You do not need to add a pull request reference or an author, this will be added automatically by CI. ✨ -->
 
+ - Fix rendering of dynamically modified docstrings.
+   ([#537](https://github.com/mitmproxy/pdoc/pull/537), @mhils)
+ - Updated bundled markdown2 version to fix a bug with empty code blocks.
+   ([#537](https://github.com/mitmproxy/pdoc/pull/537), @mhils)
+ - `pdoc.doc_ast.AstInfo` now has separate `func_docstrings` and `var_docstrings` attributes 
+   instead of one combined one.
+   ([#537](https://github.com/mitmproxy/pdoc/pull/537), @mhils)
 
 ## 2023-03-31: pdoc 13.1.0
 

diff --git a/pdoc/doc.py b/pdoc/doc.py
@@ -223,6 +223,11 @@ def _member_objects(self) -> dict[str, Any]:
     def _var_docstrings(self) -> dict[str, str]:
         """A mapping from some member variable names to their docstrings."""
 
+    @cached_property
+    @abstractmethod
+    def _func_docstrings(self) -> dict[str, str]:
+        """A mapping from some member function names to their raw (not processed by any @decorators) docstrings."""
+
     @cached_property
     @abstractmethod
     def _var_annotations(self) -> dict[str, Any]:
@@ -312,6 +317,8 @@ def members(self) -> dict[str, Doc]:
                 )
             if self._var_docstrings.get(name):
                 doc.docstring = self._var_docstrings[name]
+            if self._func_docstrings.get(name) and not doc.docstring:
+                doc.docstring = self._func_docstrings[name]
             members[doc.name] = doc
 
         if isinstance(self, Module):
@@ -409,7 +416,11 @@ def is_package(self) -> bool:
 
     @cached_property
     def _var_docstrings(self) -> dict[str, str]:
-        return doc_ast.walk_tree(self.obj).docstrings
+        return doc_ast.walk_tree(self.obj).var_docstrings
+
+    @cached_property
+    def _func_docstrings(self) -> dict[str, str]:
+        return doc_ast.walk_tree(self.obj).func_docstrings
 
     @cached_property
     def _var_annotations(self) -> dict[str, Any]:
@@ -478,7 +489,11 @@ def include(name: str) -> bool:
 
     @cached_property
     def _documented_members(self) -> set[str]:
-        return self._var_docstrings.keys() | self._var_annotations.keys()
+        return (
+            self._var_docstrings.keys()
+            | self._func_docstrings.keys()
+            | self._var_annotations.keys()
+        )
 
     @cached_property
     def _member_objects(self) -> dict[str, Any]:
@@ -526,6 +541,8 @@ def _member_objects(self) -> dict[str, Any]:
                     members[name] = obj
             for name in self._var_docstrings:
                 members.setdefault(name, empty)
+            for name in self._func_docstrings:
+                members.setdefault(name, empty)
 
             members, notfound = doc_ast.sort_by_source(self.obj, {}, members)
             members.update(notfound)
@@ -587,7 +604,15 @@ def docstring(self) -> str:
     def _var_docstrings(self) -> dict[str, str]:
         docstrings: dict[str, str] = {}
         for cls in self._mro:
-            for name, docstr in doc_ast.walk_tree(cls).docstrings.items():
+            for name, docstr in doc_ast.walk_tree(cls).var_docstrings.items():
+                docstrings.setdefault(name, docstr)
+        return docstrings
+
+    @cached_property
+    def _func_docstrings(self) -> dict[str, str]:
+        docstrings: dict[str, str] = {}
+        for cls in self._mro:
+            for name, docstr in doc_ast.walk_tree(cls).func_docstrings.items():
                 docstrings.setdefault(name, docstr)
         return docstrings
 
@@ -642,7 +667,11 @@ def _declarations(self) -> dict[str, tuple[str, str]]:
         decls: dict[str, tuple[str, str]] = {}
         for cls in self._mro:
             treeinfo = doc_ast.walk_tree(cls)
-            for name in treeinfo.docstrings.keys() | treeinfo.annotations.keys():
+            for name in (
+                treeinfo.var_docstrings.keys()
+                | treeinfo.func_docstrings.keys()
+                | treeinfo.annotations.keys()
+            ):
                 decls.setdefault(name, (cls.__module__, f"{cls.__qualname__}.{name}"))
             for name in cls.__dict__:
                 decls.setdefault(name, (cls.__module__, f"{cls.__qualname__}.{name}"))

diff --git a/pdoc/doc_ast.py b/pdoc/doc_ast.py
@@ -85,8 +85,10 @@ def unparse(tree: ast.AST):
 class AstInfo:
     """The information extracted from walking the syntax tree."""
 
-    docstrings: dict[str, str]
+    var_docstrings: dict[str, str]
     """A qualname -> docstring mapping."""
+    func_docstrings: dict[str, str]
+    """A qualname -> docstring mapping for functions."""
     annotations: dict[str, str]
     """A qualname -> annotation mapping.
 
@@ -104,7 +106,8 @@ def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 def _walk_tree(
     tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
 ) -> AstInfo:
-    docstrings = {}
+    var_docstrings = {}
+    func_docstrings = {}
     annotations = {}
     for a, b in _pairwise_longest(_nodes(tree)):
         if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple:
@@ -122,7 +125,7 @@ def _walk_tree(
         elif isinstance(a, ast.FunctionDef) and a.body:
             first = a.body[0]
             if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str):
-                docstrings[a.name] = inspect.cleandoc(first.value.s).strip()
+                func_docstrings[a.name] = inspect.cleandoc(first.value.s).strip()
             continue
         else:
             continue
@@ -131,14 +134,15 @@ def _walk_tree(
             and isinstance(b.value, ast.Constant)
             and isinstance(b.value.value, str)
         ):
-            docstrings[name] = inspect.cleandoc(b.value.value).strip()
+            var_docstrings[name] = inspect.cleandoc(b.value.value).strip()
         elif isinstance(b, ast.Expr) and isinstance(
             b.value, ast.Str
         ):  # pragma: no cover
             # Python <= 3.7
-            docstrings[name] = inspect.cleandoc(b.value.s).strip()
+            var_docstrings[name] = inspect.cleandoc(b.value.s).strip()
     return AstInfo(
-        docstrings,
+        var_docstrings,
+        func_docstrings,
         annotations,
     )
 

diff --git a/pdoc/markdown2/__init__.py b/pdoc/markdown2/__init__.py
@@ -1,7 +1,7 @@
 # fmt: off
 # flake8: noqa
 # type: ignore
-# Taken from here: https://github.com/trentm/python-markdown2/blob/f456341fde46e0a492d0bc0e2ee39957d4fb770d/lib/markdown2.py
+# Taken from here: https://github.com/trentm/python-markdown2/blob/bce3f18ed86a19b418c8114a712bb6fee790c4c2/lib/markdown2.py
 
 #!/usr/bin/env python
 # Copyright (c) 2012 Trent Mick.
@@ -61,7 +61,7 @@
   highlighting when using fenced-code-blocks and highlightjs.
 * html-classes: Takes a dict mapping html tag names (lowercase) to a
   string to use for a "class" tag attribute. Currently only supports "img",
-  "table", "pre", "code", "ul" and "ol" tags. Add an issue if you require
+  "table", "thead", "pre", "code", "ul" and "ol" tags. Add an issue if you require
   this for other tags.
 * link-patterns: Auto-link given regex patterns in text (e.g. bug number
   references, revision number references).
@@ -104,18 +104,18 @@
 #   not yet sure if there implications with this. Compare 'pydoc sre'
 #   and 'perldoc perlre'.
 
-__version_info__ = (2, 4, 8)
+__version_info__ = (2, 4, 9)
 __version__ = '.'.join(map(str, __version_info__))
 __author__ = "Trent Mick"
 
-import sys
-import re
-import logging
-from hashlib import sha256
-import optparse
-from random import random, randint
+import argparse
 import codecs
+import logging
+import re
+import sys
 from collections import defaultdict
+from hashlib import sha256
+from random import randint, random
 
 # ---- globals
 
@@ -1144,7 +1144,7 @@ def _table_sub(self, match):
                 align_from_col_idx[col_idx] = ' style="text-align:right;"'
 
         # thead
-        hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead>', '<tr>']
+        hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead%s>' % self._html_class_str_from_tag('thead'), '<tr>']
         cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
         for col_idx, col in enumerate(cols):
             hlines.append('  <th%s>%s</th>' % (
@@ -1220,7 +1220,7 @@ def format_cell(text):
         add_hline('<table%s>' % self._html_class_str_from_tag('table'))
         # Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
         if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
-            add_hline('<thead>', 1)
+            add_hline('<thead%s>' % self._html_class_str_from_tag('thead'), 1)
             add_hline('<tr>', 2)
             for cell in rows[0]:
                 add_hline("<th>{}</th>".format(format_cell(cell)), 3)
@@ -2246,7 +2246,7 @@ def _do_strike(self, text):
     def _do_underline(self, text):
         text = self._underline_re.sub(r"<u>\1</u>", text)
         return text
-    
+
     _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
     def _do_tg_spoiler(self, text):
         text = self._tg_spoiler_re.sub(r"<tg-spoiler>\1</tg-spoiler>", text)
@@ -2538,6 +2538,9 @@ def _do_link_patterns(self, text):
         for regex, repl in self.link_patterns:
             replacements = []
             for match in regex.finditer(text):
+                if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash):
+                    continue
+
                 if hasattr(repl, "__call__"):
                     href = repl(match)
                 else:
@@ -2614,12 +2617,17 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
             re.findall(r'^[ \t]*', line)[0] if line else None
             for line in text.splitlines()
         ]
+        whitespace_not_empty = [i for i in whitespace if i is not None]
+
+        # if no whitespace detected (ie: no lines in code block, issue #505)
+        if not whitespace_not_empty:
+            return '', text
 
         # get minimum common whitespace
-        outdent = min(i for i in whitespace if i is not None)
+        outdent = min(whitespace_not_empty)
         # adjust min common ws to be within bounds
         if min_outdent is not None:
-            outdent = min([i for i in whitespace if i is not None and i >= min_outdent] or [min_outdent])
+            outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
         if max_outdent is not None:
             outdent = min(outdent, max_outdent)
 
@@ -2642,6 +2650,19 @@ def _uniform_indent(self, text, indent, include_empty_lines=False):
             for line in text.splitlines(True)
         )
 
+    @staticmethod
+    def _match_overlaps_substr(text, match, substr):
+        '''
+        Checks if a regex match overlaps with a substring in the given text.
+        '''
+        for instance in re.finditer(re.escape(substr), text):
+            start, end = instance.span()
+            if start <= match.start() <= end:
+                return True
+            if start <= match.end() <= end:
+                return True
+        return False
+
 
 class MarkdownWithExtras(Markdown):
     """A markdowner class that enables most extras:
@@ -2961,8 +2982,8 @@ def _html_escape_url(attr, safe_mode=False):
 
 # ---- mainline
 
-class _NoReflowFormatter(optparse.IndentedHelpFormatter):
-    """An optparse formatter that does NOT reflow the description."""
+class _NoReflowFormatter(argparse.RawDescriptionHelpFormatter):
+    """An argparse formatter that does NOT reflow the description."""
     def format_description(self, description):
         return description or ""
 
@@ -2978,38 +2999,45 @@ def main(argv=None):
     if not logging.root.handlers:
         logging.basicConfig()
 
-    usage = "usage: %prog [PATHS...]"
-    version = "%prog "+__version__
-    parser = optparse.OptionParser(prog="markdown2", usage=usage,
-        version=version, description=cmdln_desc,
-        formatter=_NoReflowFormatter())
-    parser.add_option("-v", "--verbose", dest="log_level",
+    parser = argparse.ArgumentParser(
+        prog="markdown2", description=cmdln_desc, usage='%(prog)s [PATHS...]',
+        formatter_class=_NoReflowFormatter
+    )
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {version}'.format(version=__version__))
+    parser.add_argument('paths', nargs='*',
+                        help=(
+                            'optional list of files to convert.'
+                            'If none are given, stdin will be used'
+                        ))
+    parser.add_argument("-v", "--verbose", dest="log_level",
                       action="store_const", const=logging.DEBUG,
                       help="more verbose output")
-    parser.add_option("--encoding",
+    parser.add_argument("--encoding",
                       help="specify encoding of text content")
-    parser.add_option("--html4tags", action="store_true", default=False,
+    parser.add_argument("--html4tags", action="store_true", default=False,
                       help="use HTML 4 style for empty element tags")
-    parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
+    parser.add_argument("-s", "--safe", metavar="MODE", dest="safe_mode",
                       help="sanitize literal HTML: 'escape' escapes "
                            "HTML meta chars, 'replace' replaces with an "
                            "[HTML_REMOVED] note")
-    parser.add_option("-x", "--extras", action="append",
+    parser.add_argument("-x", "--extras", action="append",
                       help="Turn on specific extra features (not part of "
                            "the core Markdown spec). See above.")
-    parser.add_option("--use-file-vars",
+    parser.add_argument("--use-file-vars",
                       help="Look for and use Emacs-style 'markdown-extras' "
                            "file var to turn on extras. See "
                            "<https://github.com/trentm/python-markdown2/wiki/Extras>")
-    parser.add_option("--link-patterns-file",
+    parser.add_argument("--link-patterns-file",
                       help="path to a link pattern file")
-    parser.add_option("--self-test", action="store_true",
+    parser.add_argument("--self-test", action="store_true",
                       help="run internal self-tests (some doctests)")
-    parser.add_option("--compare", action="store_true",
+    parser.add_argument("--compare", action="store_true",
                       help="run against Markdown.pl as well (for testing)")
     parser.set_defaults(log_level=logging.INFO, compare=False,
                         encoding="utf-8", safe_mode=None, use_file_vars=False)
-    opts, paths = parser.parse_args()
+    opts = parser.parse_args()
+    paths = opts.paths
     log.setLevel(opts.log_level)
 
     if opts.self_test:
@@ -3051,7 +3079,7 @@ def main(argv=None):
     else:
         link_patterns = None
 
-    from os.path import join, dirname, abspath, exists
+    from os.path import abspath, dirname, exists, join
     markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
                        "Markdown.pl")
     if not paths:
@@ -3064,7 +3092,7 @@ def main(argv=None):
             text = fp.read()
             fp.close()
         if opts.compare:
-            from subprocess import Popen, PIPE
+            from subprocess import PIPE, Popen
             print("==== Markdown.pl ====")
             p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True)
             p.stdin.write(text.encode('utf-8'))