From eb2cb0e7b2470dc850a2e78dda92863de88b425e Mon Sep 17 00:00:00 2001 From: Peter Cock Date: Thu, 15 Apr 2021 12:33:12 +0100 Subject: [PATCH] Remove Python 2 specific workarounds --- flake8_rst_docstrings.py | 138 ++------------------------------------- 1 file changed, 6 insertions(+), 132 deletions(-) diff --git a/flake8_rst_docstrings.py b/flake8_rst_docstrings.py index d8d51c1..a9b8264 100644 --- a/flake8_rst_docstrings.py +++ b/flake8_rst_docstrings.py @@ -5,136 +5,14 @@ """ import logging -import re import sys -import tokenize as tk +from tokenize import open as tokenize_open -from pydocstyle.parser import Parser +from io import StringIO +from io import TextIOWrapper -try: - from StringIO import StringIO -except ImportError: # Python 3.0 and later - from io import StringIO - from io import TextIOWrapper - -##################################### -# Start of backported tokenize code # -##################################### - -# If possible (python >= 3.2) use tokenize.open to open files, so PEP 263 -# encoding markers are interpreted. -try: - tokenize_open = tk.open -except AttributeError: - # Fall back on a backport of the encoding aware tokenize open function, - # which requires we back port tokenize.detect_encoding to implement. - from codecs import lookup, BOM_UTF8 - from io import open as io_open - - cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)") - blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)") - - # I don't think 'blank regular expression' is well named, think - # it looks for blank line after any Python # comment removed. - # Key test case of interest is hashbang lines! - assert blank_re.match(b"\n") - assert blank_re.match(b"# Comment\n") - assert blank_re.match(b"#!/usr/bin/python\n") - assert blank_re.match(b"#!/usr/bin/env python\n") - assert not blank_re.match(b'"""Welcome\n') - assert not blank_re.match(b'"""Welcome"""\n') - - def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c (PRIVATE).""" - # sys.stderr.write("DEBUG: _get_normal_name(%r)\n" % orig_enc) - # Only care about the first 12 characters. - enc = orig_enc[:12].lower().replace("_", "-") - if enc == "utf-8" or enc.startswith("utf-8-"): - return "utf-8" - if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith( - ("latin-1-", "iso-8859-1-", "iso-latin-1-") - ): - return "iso-8859-1" - return orig_enc - - def _find_cookie(line, filename, bom_found): - """Find encoding string in a line of Python (PRIVATE).""" - # sys.stderr.write("DEBUG: _find_cookie(%r, %r, %r)\n" - # % (line, filename, bom_found)) - match = cookie_re.match(line) - if not match: - return None - encoding = _get_normal_name(match.group(1)) - try: - lookup(encoding) - except LookupError: - # This behaviour mimics the Python interpreter - raise SyntaxError( - "unknown encoding for {!r}: {}".format(filename, encoding) - ) - - if bom_found: - if encoding != "utf-8": - # This behaviour mimics the Python interpreter - raise SyntaxError("encoding problem for {!r}: utf-8".format(filename)) - encoding += "-sig" - return encoding - - def tokenize_open(filename): - """Simulate opening a Python file read only with the correct encoding. - - While this was based on the Python 3 standard library function - tokenize.open in order to backport it to Python 2.7, this proved - painful. - - Note that because this text will later be fed into ``exex(...)`` we - would hit SyntaxError encoding declaration in Unicode string, so the - handle returned has the encoding line masked out! - - Note we don't just remove the line as that would throw off the line - numbers, it is replaced with a Python comment. - """ - # sys.stderr.write("DEBUG: tokenize_open(%r)\n" % filename) - # Will check the first & second lines for an encoding - # AND REMOVE IT FROM THE TEXT RETURNED - with io_open(filename, "rb") as handle: - lines = list(handle) - - # Find the encoding - first = lines[0] if lines else b"" - second = lines[1] if len(lines) > 1 else b"" - default = "utf-8" - bom_found = False - if first.startswith(BOM_UTF8): - bom_found = True - first = first[3:] - default = "utf-8-sig" - encoding = _find_cookie(first, filename, bom_found) - if encoding: - lines[0] = "# original encoding removed\n" - if not encoding and blank_re.match(first): - # sys.stderr.write("DEBUG: Trying second line %r\n" - # % second) - encoding = _find_cookie(second, filename, bom_found) - if encoding: - lines[1] = "# original encoding removed\n" - if not encoding: - encoding = default - - # sys.stderr.write("DEBUG: tokenize_open using encoding=%r\n" - # % encoding) - - # Apply the encoding, using StringIO as we removed the - # original encoding to help legacy code using exec. - # for b in lines: - # sys.stderr.write(b"DEBUG: " + b) - return StringIO("".join(b.decode(encoding) for b in lines)) - - -################################### -# End of backported tokenize code # -################################### +from pydocstyle.parser import Parser import restructuredtext_lint as rst_lint @@ -441,9 +319,5 @@ def load_source(self): else: self.source = TextIOWrapper(sys.stdin.buffer, errors="ignore").read() else: - # Could be a Python 2.7 StringIO with no context manager, sigh. - # with tokenize_open(self.filename) as fd: - # self.source = fd.read() - handle = tokenize_open(self.filename) - self.source = handle.read() - handle.close() + with tokenize_open(self.filename) as fd: + self.source = fd.read()