diff --git a/README.rst b/README.rst index df321fc..ea74995 100644 --- a/README.rst +++ b/README.rst @@ -146,8 +146,8 @@ file in order to extract the docstrings, or in processing the contents. Code Description (and notes) ------ ----------------------------------------------------------------------- RST900 Failed to load file -RST901 Failed to parse file -RST902 Failed to parse __all__ entry (e.g. single entry as string not tuple) +RST901 Failed to parse file (*No longer used*) +RST902 Failed to parse __all__ entry (*No longer used*) RST903 Failed to lint docstring ====== ======================================================================= @@ -236,6 +236,9 @@ Version History ======= ========== =========================================================== Version Released Changes ------- ---------- ----------------------------------------------------------- +v0.2.0 *Pending* - Use AST from flake8 not re-parsing with pydocstyle. + - Drops ``RST901`` (internal problem with parser). + - Drops ``RST902`` (checking any ``__all__`` entry). v0.1.2 2021-04-16 - Dropped unused logging module import. - Extended test coverage. v0.1.1 2021-04-15 - Explicit ``pygments`` dependency for any code blocks. diff --git a/flake8_rst_docstrings.py b/flake8_rst_docstrings.py index 07b0a12..638fa03 100644 --- a/flake8_rst_docstrings.py +++ b/flake8_rst_docstrings.py @@ -4,25 +4,18 @@ source code. """ -import sys - -from tokenize import open as tokenize_open - -from io import StringIO -from io import TextIOWrapper - -from pydocstyle.parser import Parser +import ast import restructuredtext_lint as rst_lint -__version__ = "0.1.2" +__version__ = "0.2.0" rst_prefix = "RST" rst_fail_load = 900 -rst_fail_parse = 901 -rst_fail_all = 902 +# rst_fail_parse = 901 +# rst_fail_all = 902 rst_fail_lint = 903 # Level 1 - info @@ -105,79 +98,63 @@ def code_mapping(level, msg, extra_directives, extra_roles, default=99): return default -#################################### -# Start of code copied from PEP257 # -#################################### - -# This is the reference implementation of the alogrithm -# in PEP257 for removing the indentation of a docstring, -# which has been placed in the public domain. -# -# This includes the minor change from sys.maxint to -# sys.maxsize for Python 3 compatibility. -# -# https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation - - -def trim(docstring): - """PEP257 docstring indentation trim function.""" - if not docstring: - return "" - # Convert tabs to spaces (following the normal Python rules) - # and split into a list of lines: - lines = docstring.expandtabs().splitlines() - # Determine minimum indentation (first line doesn't count): - indent = sys.maxsize - for line in lines[1:]: - stripped = line.lstrip() - if stripped: - indent = min(indent, len(line) - len(stripped)) - # Remove indentation (first line is special): - trimmed = [lines[0].strip()] - if indent < sys.maxsize: - for line in lines[1:]: - trimmed.append(line[indent:].rstrip()) - # Strip off trailing and leading blank lines: - while trimmed and not trimmed[-1]: - trimmed.pop() - while trimmed and not trimmed[0]: - trimmed.pop(0) - # Return a single string: - return "\n".join(trimmed) - - -################################## -# End of code copied from PEP257 # -################################## - - -def dequote_docstring(text): - """Remove the quotes delimiting a docstring.""" - # TODO: Process escaped characters unless raw mode? - text = text.strip() - if len(text) > 6 and text[:3] == text[-3:] == '"""': - # Standard case, """...""" - return text[3:-3] - if len(text) > 7 and text[:4] in ('u"""', 'r"""') and text[-3:] == '"""': - # Unicode, u"""...""", or raw r"""...""" - return text[4:-3] - # Other flake8 tools will report atypical quotes: - if len(text) > 6 and text[:3] == text[-3:] == "'''": - return text[3:-3] - if len(text) > 7 and text[:4] in ("u'''", "r'''") and text[-3:] == "'''": - return text[4:-3] - if len(text) > 2 and text[0] == text[-1] == '"': - return text[1:-1] - if len(text) > 3 and text[:2] in ('u"', 'r"') and text[-1] == '"': - return text[2:-1] - if len(text) > 2 and text[0] == text[-1] == "'": - return text[1:-1] - if len(text) > 3 and text[:2] in ("u'", "r'") and text[-1] == "'": - return text[2:-1] - raise ValueError("Bad quotes!") - - -parse = Parser() # from pydocstyle +class RstDocStringVisitor(ast.NodeVisitor): + """Ast visitor for RST docstring validation.""" + + errors = [] + + def rst_validate(self, node): + """Validate the docstring of this node as RST.""" + self.generic_visit(node) # Ensure visit any children + docstring = ast.get_docstring(node, clean=True) + if not docstring: + # People can use flake8-docstrings to report missing docstrings + return + + start = node.body[0].lineno - len( + ast.get_docstring(node, clean=False).split("\n") + ) + # with open("/dev/stderr", "w") as handle: + # handle.write(f"DEBUG: Checking {node} from line {start}\n") + + try: + rst_errors = list(rst_lint.lint(docstring)) + except Exception as err: + # e.g. UnicodeDecodeError + msg = "%s%03i %s" % ( + rst_prefix, + rst_fail_lint, + "Failed to lint docstring: %s %s\n%s" + % (node.name, err, repr(docstring)), + ) + self.errors.append((node.body[0].lineno, msg)) + return + + for rst_error in rst_errors: + # We don't know the column number + self.errors.append( + ( + rst_error.line + start, + rst_error.level, + rst_error.message, + ) + ) + + def visit_Module(self, node: ast.Module): + """Visit a module in the AST.""" + self.rst_validate(node) + + def visit_ClassDef(self, node: ast.ClassDef): + """Visit a class definition in the AST.""" + self.rst_validate(node) + + def visit_FunctionDef(self, node: ast.FunctionDef): + """Visit a function definition in the AST.""" + self.rst_validate(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef): + """Visit an async-function definition in the AST.""" + self.rst_validate(node) class reStructuredTextChecker(object): @@ -186,18 +163,10 @@ class reStructuredTextChecker(object): name = "rst-docstrings" version = __version__ - STDIN_NAMES = {"stdin", "-", "(none)", None} - def __init__(self, tree, filename="(none)"): """Initialise.""" self.tree = tree self.filename = filename - try: - self.load_source() - self.err = None - except Exception as err: - self.source = None - self.err = err @classmethod def add_options(cls, parser): @@ -227,60 +196,23 @@ def parse_options(cls, options): def run(self): """Use docutils to check docstrings are valid RST.""" - # Is there any reason not to call load_source here? - if self.err is not None: - assert self.source is None + # with open("/dev/stderr", "w") as handle: + # handle.write(f"DEBUG: Checking tree of {self.filename}\n") + if self.tree is None: msg = "%s%03i %s" % ( rst_prefix, rst_fail_load, "Failed to load file: %s" % self.err, ) yield 0, 0, msg, type(self) - module = [] - try: - module = parse(StringIO(self.source), self.filename) - except SyntaxError as err: - msg = "%s%03i %s" % ( - rst_prefix, - rst_fail_parse, - "Failed to parse file: %s" % err, - ) - yield 0, 0, msg, type(self) - module = [] - if module.dunder_all_error: - msg = "%s%03i %s" % ( - rst_prefix, - rst_fail_all, - "Failed to parse __all__ entry.", - ) - yield 0, 0, msg, type(self) - # module = [] - for definition in module: - if not definition.docstring: - # People can use flake8-docstrings to report missing - # docstrings - continue - try: - # Note we use the PEP257 trim algorithm to remove the - # leading whitespace from each line - this avoids false - # positive severe error "Unexpected section title." - unindented = trim(dequote_docstring(definition.docstring)) - # Off load RST validation to reStructuredText-lint - # which calls docutils internally. - # TODO: Should we pass the Python filename as filepath? - rst_errors = list(rst_lint.lint(unindented)) - except Exception as err: - # e.g. UnicodeDecodeError - msg = "%s%03i %s" % ( - rst_prefix, - rst_fail_lint, - "Failed to lint docstring: %s - %s" % (definition.name, err), - ) - yield definition.start, 0, msg, type(self) - continue - for rst_error in rst_errors: + else: + visitor = RstDocStringVisitor() + visitor.visit(self.tree) + # with open("/dev/stderr", "w") as handle: + # handle.write(f"DEBUG: From {self.filename} found {visitor.errors}\n") + for line, level, msg in visitor.errors: # TODO - make this a configuration option? - if rst_error.level <= 1: + if level <= 1: continue # Levels: # @@ -291,30 +223,14 @@ def run(self): # 4 - severe --> RST4## codes # # Map the string to a unique code: - msg = rst_error.message.split("\n", 1)[0] - code = code_mapping( - rst_error.level, msg, self.extra_directives, self.extra_roles - ) + msg = msg.split("\n", 1)[0] + code = code_mapping(level, msg, self.extra_directives, self.extra_roles) if not code: # We ignored it, e.g. a known Sphinx role continue assert 0 < code < 100, code - code += 100 * rst_error.level + code += 100 * level msg = "%s%03i %s" % (rst_prefix, code, msg) - # This will return the line number by combining the - # start of the docstring with the offet within it. # We don't know the column number, leaving as zero. - yield definition.start + rst_error.line, 0, msg, type(self) - - def load_source(self): - """Load the source for the specified file.""" - if self.filename in self.STDIN_NAMES: - self.filename = "stdin" - if sys.version_info[0] < 3: - self.source = sys.stdin.read() - else: - self.source = TextIOWrapper(sys.stdin.buffer, errors="ignore").read() - else: - with tokenize_open(self.filename) as fd: - self.source = fd.read() + yield line, 0, msg, type(self) diff --git a/setup.py b/setup.py index 3d409bd..875c428 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,6 @@ def get_version(fname="flake8_rst_docstrings.py"): install_requires=[ "flake8 >= 3.0.0", "restructuredtext_lint", - "pydocstyle >= 3.0.0", "pygments", ], entry_points={ diff --git a/tests/RST902/bad_all.py b/tests/RST902/bad_all.py deleted file mode 100644 index f7d7f23..0000000 --- a/tests/RST902/bad_all.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Silly test case for a bad __all__ entry. - -This example catches a technical issue with the Python code, not -an RST error directly - but since API documentation usually looks -at the ``__all__`` value this is still relevant. - -The ``__all__`` value should be a tuple, even when it has a single -entry - not a string. This file is considered to be in error, and -should fail:: - - $ flake8 --select RST RST902/bad_all.py - RST902/bad_all.py:0:1: RST902 Failed to parse __all__ entry. - -Note that currently it does not report the actual line number. -""" - - -def hello(name="Dave"): - """Print hello message.""" - print("Hello %s" % name) - - -__all__ = "hello" # Should be a single element tuple