From eb2cb0e7b2470dc850a2e78dda92863de88b425e Mon Sep 17 00:00:00 2001
From: Peter Cock
Date: Thu, 15 Apr 2021 12:33:12 +0100
Subject: [PATCH] Remove Python 2 specific workarounds
---
flake8_rst_docstrings.py | 138 ++-------------------------------------
1 file changed, 6 insertions(+), 132 deletions(-)
diff --git a/flake8_rst_docstrings.py b/flake8_rst_docstrings.py
index d8d51c1..a9b8264 100644
--- a/flake8_rst_docstrings.py
+++ b/flake8_rst_docstrings.py
@@ -5,136 +5,14 @@
"""
import logging
-import re
import sys
-import tokenize as tk
+from tokenize import open as tokenize_open
-from pydocstyle.parser import Parser
+from io import StringIO
+from io import TextIOWrapper
-try:
- from StringIO import StringIO
-except ImportError: # Python 3.0 and later
- from io import StringIO
- from io import TextIOWrapper
-
-#####################################
-# Start of backported tokenize code #
-#####################################
-
-# If possible (python >= 3.2) use tokenize.open to open files, so PEP 263
-# encoding markers are interpreted.
-try:
- tokenize_open = tk.open
-except AttributeError:
- # Fall back on a backport of the encoding aware tokenize open function,
- # which requires we back port tokenize.detect_encoding to implement.
- from codecs import lookup, BOM_UTF8
- from io import open as io_open
-
- cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)")
- blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)")
-
- # I don't think 'blank regular expression' is well named, think
- # it looks for blank line after any Python # comment removed.
- # Key test case of interest is hashbang lines!
- assert blank_re.match(b"\n")
- assert blank_re.match(b"# Comment\n")
- assert blank_re.match(b"#!/usr/bin/python\n")
- assert blank_re.match(b"#!/usr/bin/env python\n")
- assert not blank_re.match(b'"""Welcome\n')
- assert not blank_re.match(b'"""Welcome"""\n')
-
- def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c (PRIVATE)."""
- # sys.stderr.write("DEBUG: _get_normal_name(%r)\n" % orig_enc)
- # Only care about the first 12 characters.
- enc = orig_enc[:12].lower().replace("_", "-")
- if enc == "utf-8" or enc.startswith("utf-8-"):
- return "utf-8"
- if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
- ("latin-1-", "iso-8859-1-", "iso-latin-1-")
- ):
- return "iso-8859-1"
- return orig_enc
-
- def _find_cookie(line, filename, bom_found):
- """Find encoding string in a line of Python (PRIVATE)."""
- # sys.stderr.write("DEBUG: _find_cookie(%r, %r, %r)\n"
- # % (line, filename, bom_found))
- match = cookie_re.match(line)
- if not match:
- return None
- encoding = _get_normal_name(match.group(1))
- try:
- lookup(encoding)
- except LookupError:
- # This behaviour mimics the Python interpreter
- raise SyntaxError(
- "unknown encoding for {!r}: {}".format(filename, encoding)
- )
-
- if bom_found:
- if encoding != "utf-8":
- # This behaviour mimics the Python interpreter
- raise SyntaxError("encoding problem for {!r}: utf-8".format(filename))
- encoding += "-sig"
- return encoding
-
- def tokenize_open(filename):
- """Simulate opening a Python file read only with the correct encoding.
-
- While this was based on the Python 3 standard library function
- tokenize.open in order to backport it to Python 2.7, this proved
- painful.
-
- Note that because this text will later be fed into ``exex(...)`` we
- would hit SyntaxError encoding declaration in Unicode string, so the
- handle returned has the encoding line masked out!
-
- Note we don't just remove the line as that would throw off the line
- numbers, it is replaced with a Python comment.
- """
- # sys.stderr.write("DEBUG: tokenize_open(%r)\n" % filename)
- # Will check the first & second lines for an encoding
- # AND REMOVE IT FROM THE TEXT RETURNED
- with io_open(filename, "rb") as handle:
- lines = list(handle)
-
- # Find the encoding
- first = lines[0] if lines else b""
- second = lines[1] if len(lines) > 1 else b""
- default = "utf-8"
- bom_found = False
- if first.startswith(BOM_UTF8):
- bom_found = True
- first = first[3:]
- default = "utf-8-sig"
- encoding = _find_cookie(first, filename, bom_found)
- if encoding:
- lines[0] = "# original encoding removed\n"
- if not encoding and blank_re.match(first):
- # sys.stderr.write("DEBUG: Trying second line %r\n"
- # % second)
- encoding = _find_cookie(second, filename, bom_found)
- if encoding:
- lines[1] = "# original encoding removed\n"
- if not encoding:
- encoding = default
-
- # sys.stderr.write("DEBUG: tokenize_open using encoding=%r\n"
- # % encoding)
-
- # Apply the encoding, using StringIO as we removed the
- # original encoding to help legacy code using exec.
- # for b in lines:
- # sys.stderr.write(b"DEBUG: " + b)
- return StringIO("".join(b.decode(encoding) for b in lines))
-
-
-###################################
-# End of backported tokenize code #
-###################################
+from pydocstyle.parser import Parser
import restructuredtext_lint as rst_lint
@@ -441,9 +319,5 @@ def load_source(self):
else:
self.source = TextIOWrapper(sys.stdin.buffer, errors="ignore").read()
else:
- # Could be a Python 2.7 StringIO with no context manager, sigh.
- # with tokenize_open(self.filename) as fd:
- # self.source = fd.read()
- handle = tokenize_open(self.filename)
- self.source = handle.read()
- handle.close()
+ with tokenize_open(self.filename) as fd:
+ self.source = fd.read()