Skip to content

Commit

Permalink
Use the AST from flake8 instead of pydocstyle parser
Browse files Browse the repository at this point in the history
This will be v0.2.0
  • Loading branch information
peterjc committed Apr 23, 2021
1 parent e9baaac commit 2d2e284
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 185 deletions.
7 changes: 5 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ file in order to extract the docstrings, or in processing the contents.
Code Description (and notes)
------ -----------------------------------------------------------------------
RST900 Failed to load file
RST901 Failed to parse file
RST902 Failed to parse __all__ entry (e.g. single entry as string not tuple)
RST901 Failed to parse file (*No longer used*)
RST902 Failed to parse __all__ entry (*No longer used*)
RST903 Failed to lint docstring
====== =======================================================================

Expand Down Expand Up @@ -236,6 +236,9 @@ Version History
======= ========== ===========================================================
Version Released Changes
------- ---------- -----------------------------------------------------------
v0.2.0 *Pending* - Use AST from flake8 not re-parsing with pydocstyle.
- Drops ``RST901`` (internal problem with parser).
- Drops ``RST902`` (checking any ``__all__`` entry).
v0.1.2 2021-04-16 - Dropped unused logging module import.
- Extended test coverage.
v0.1.1 2021-04-15 - Explicit ``pygments`` dependency for any code blocks.
Expand Down
234 changes: 75 additions & 159 deletions flake8_rst_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,18 @@
source code.
"""

import sys

from tokenize import open as tokenize_open

from io import StringIO
from io import TextIOWrapper

from pydocstyle.parser import Parser
import ast

import restructuredtext_lint as rst_lint


__version__ = "0.1.2"
__version__ = "0.2.0"


rst_prefix = "RST"
rst_fail_load = 900
rst_fail_parse = 901
rst_fail_all = 902
# rst_fail_parse = 901
# rst_fail_all = 902
rst_fail_lint = 903

# Level 1 - info
Expand Down Expand Up @@ -105,79 +98,63 @@ def code_mapping(level, msg, extra_directives, extra_roles, default=99):
return default


####################################
# Start of code copied from PEP257 #
####################################

# This is the reference implementation of the alogrithm
# in PEP257 for removing the indentation of a docstring,
# which has been placed in the public domain.
#
# This includes the minor change from sys.maxint to
# sys.maxsize for Python 3 compatibility.
#
# https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation


def trim(docstring):
"""PEP257 docstring indentation trim function."""
if not docstring:
return ""
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = docstring.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = sys.maxsize
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < sys.maxsize:
for line in lines[1:]:
trimmed.append(line[indent:].rstrip())
# Strip off trailing and leading blank lines:
while trimmed and not trimmed[-1]:
trimmed.pop()
while trimmed and not trimmed[0]:
trimmed.pop(0)
# Return a single string:
return "\n".join(trimmed)


##################################
# End of code copied from PEP257 #
##################################


def dequote_docstring(text):
"""Remove the quotes delimiting a docstring."""
# TODO: Process escaped characters unless raw mode?
text = text.strip()
if len(text) > 6 and text[:3] == text[-3:] == '"""':
# Standard case, """..."""
return text[3:-3]
if len(text) > 7 and text[:4] in ('u"""', 'r"""') and text[-3:] == '"""':
# Unicode, u"""...""", or raw r"""..."""
return text[4:-3]
# Other flake8 tools will report atypical quotes:
if len(text) > 6 and text[:3] == text[-3:] == "'''":
return text[3:-3]
if len(text) > 7 and text[:4] in ("u'''", "r'''") and text[-3:] == "'''":
return text[4:-3]
if len(text) > 2 and text[0] == text[-1] == '"':
return text[1:-1]
if len(text) > 3 and text[:2] in ('u"', 'r"') and text[-1] == '"':
return text[2:-1]
if len(text) > 2 and text[0] == text[-1] == "'":
return text[1:-1]
if len(text) > 3 and text[:2] in ("u'", "r'") and text[-1] == "'":
return text[2:-1]
raise ValueError("Bad quotes!")


parse = Parser() # from pydocstyle
class RstDocStringVisitor(ast.NodeVisitor):
"""Ast visitor for RST docstring validation."""

errors = []

def rst_validate(self, node):
"""Validate the docstring of this node as RST."""
self.generic_visit(node) # Ensure visit any children
docstring = ast.get_docstring(node, clean=True)
if not docstring:
# People can use flake8-docstrings to report missing docstrings
return

start = node.body[0].lineno - len(
ast.get_docstring(node, clean=False).split("\n")
)
# with open("/dev/stderr", "w") as handle:
# handle.write(f"DEBUG: Checking {node} from line {start}\n")

try:
rst_errors = list(rst_lint.lint(docstring))
except Exception as err:
# e.g. UnicodeDecodeError
msg = "%s%03i %s" % (
rst_prefix,
rst_fail_lint,
"Failed to lint docstring: %s %s\n%s"
% (node.name, err, repr(docstring)),
)
self.errors.append((node.body[0].lineno, msg))
return

for rst_error in rst_errors:
# We don't know the column number
self.errors.append(
(
rst_error.line + start,
rst_error.level,
rst_error.message,
)
)

def visit_Module(self, node: ast.Module):
"""Visit a module in the AST."""
self.rst_validate(node)

def visit_ClassDef(self, node: ast.ClassDef):
"""Visit a class definition in the AST."""
self.rst_validate(node)

def visit_FunctionDef(self, node: ast.FunctionDef):
"""Visit a function definition in the AST."""
self.rst_validate(node)

def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
"""Visit an async-function definition in the AST."""
self.rst_validate(node)


class reStructuredTextChecker(object):
Expand All @@ -186,18 +163,10 @@ class reStructuredTextChecker(object):
name = "rst-docstrings"
version = __version__

STDIN_NAMES = {"stdin", "-", "(none)", None}

def __init__(self, tree, filename="(none)"):
"""Initialise."""
self.tree = tree
self.filename = filename
try:
self.load_source()
self.err = None
except Exception as err:
self.source = None
self.err = err

@classmethod
def add_options(cls, parser):
Expand Down Expand Up @@ -227,60 +196,23 @@ def parse_options(cls, options):

def run(self):
"""Use docutils to check docstrings are valid RST."""
# Is there any reason not to call load_source here?
if self.err is not None:
assert self.source is None
# with open("/dev/stderr", "w") as handle:
# handle.write(f"DEBUG: Checking tree of {self.filename}\n")
if self.tree is None:
msg = "%s%03i %s" % (
rst_prefix,
rst_fail_load,
"Failed to load file: %s" % self.err,
)
yield 0, 0, msg, type(self)
module = []
try:
module = parse(StringIO(self.source), self.filename)
except SyntaxError as err:
msg = "%s%03i %s" % (
rst_prefix,
rst_fail_parse,
"Failed to parse file: %s" % err,
)
yield 0, 0, msg, type(self)
module = []
if module.dunder_all_error:
msg = "%s%03i %s" % (
rst_prefix,
rst_fail_all,
"Failed to parse __all__ entry.",
)
yield 0, 0, msg, type(self)
# module = []
for definition in module:
if not definition.docstring:
# People can use flake8-docstrings to report missing
# docstrings
continue
try:
# Note we use the PEP257 trim algorithm to remove the
# leading whitespace from each line - this avoids false
# positive severe error "Unexpected section title."
unindented = trim(dequote_docstring(definition.docstring))
# Off load RST validation to reStructuredText-lint
# which calls docutils internally.
# TODO: Should we pass the Python filename as filepath?
rst_errors = list(rst_lint.lint(unindented))
except Exception as err:
# e.g. UnicodeDecodeError
msg = "%s%03i %s" % (
rst_prefix,
rst_fail_lint,
"Failed to lint docstring: %s - %s" % (definition.name, err),
)
yield definition.start, 0, msg, type(self)
continue
for rst_error in rst_errors:
else:
visitor = RstDocStringVisitor()
visitor.visit(self.tree)
# with open("/dev/stderr", "w") as handle:
# handle.write(f"DEBUG: From {self.filename} found {visitor.errors}\n")
for line, level, msg in visitor.errors:
# TODO - make this a configuration option?
if rst_error.level <= 1:
if level <= 1:
continue
# Levels:
#
Expand All @@ -291,30 +223,14 @@ def run(self):
# 4 - severe --> RST4## codes
#
# Map the string to a unique code:
msg = rst_error.message.split("\n", 1)[0]
code = code_mapping(
rst_error.level, msg, self.extra_directives, self.extra_roles
)
msg = msg.split("\n", 1)[0]
code = code_mapping(level, msg, self.extra_directives, self.extra_roles)
if not code:
# We ignored it, e.g. a known Sphinx role
continue
assert 0 < code < 100, code
code += 100 * rst_error.level
code += 100 * level
msg = "%s%03i %s" % (rst_prefix, code, msg)

# This will return the line number by combining the
# start of the docstring with the offet within it.
# We don't know the column number, leaving as zero.
yield definition.start + rst_error.line, 0, msg, type(self)

def load_source(self):
"""Load the source for the specified file."""
if self.filename in self.STDIN_NAMES:
self.filename = "stdin"
if sys.version_info[0] < 3:
self.source = sys.stdin.read()
else:
self.source = TextIOWrapper(sys.stdin.buffer, errors="ignore").read()
else:
with tokenize_open(self.filename) as fd:
self.source = fd.read()
yield line, 0, msg, type(self)
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def get_version(fname="flake8_rst_docstrings.py"):
install_requires=[
"flake8 >= 3.0.0",
"restructuredtext_lint",
"pydocstyle >= 3.0.0",
"pygments",
],
entry_points={
Expand Down
23 changes: 0 additions & 23 deletions tests/RST902/bad_all.py

This file was deleted.

0 comments on commit 2d2e284

Please sign in to comment.