From e110db54b49439602ec9854ef29618b457bda420 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Wed, 25 Oct 2017 17:55:26 +0200 Subject: [PATCH] improve encoding handling for `setup.cfg` Support the same mechanism as for Python sources for declaring the encoding to be used when reading `setup.cfg` (see PEP 263), and return the results of reading it as Unicode. Fix #1062 and #1136. --- setuptools/__init__.py | 34 ++++++++++++++++ setuptools/dist.py | 2 +- setuptools/py36compat.py | 37 ++++++++++++------ setuptools/tests/test_config.py | 65 ++++++++++++++++++++++++++++++- setuptools/tests/test_egg_info.py | 21 ++++++++++ 5 files changed, 144 insertions(+), 15 deletions(-) diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 04f76740828..77b4a374403 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -4,9 +4,12 @@ import functools import distutils.core import distutils.filelist +import re +from distutils.errors import DistutilsOptionError from distutils.util import convert_path from fnmatch import fnmatchcase +from setuptools.extern.six import string_types from setuptools.extern.six.moves import filter, map import setuptools.version @@ -127,6 +130,37 @@ def __init__(self, dist, **kw): _Command.__init__(self, dist) vars(self).update(kw) + def _ensure_stringlike(self, option, what, default=None): + val = getattr(self, option) + if val is None: + setattr(self, option, default) + return default + elif not isinstance(val, string_types): + raise DistutilsOptionError("'%s' must be a %s (got `%s`)" + % (option, what, val)) + return val + + def ensure_string_list(self, option): + r"""Ensure that 'option' is a list of strings. If 'option' is + currently a string, we split it either on /,\s*/ or /\s+/, so + "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become + ["foo", "bar", "baz"]. + """ + val = getattr(self, option) + if val is None: + return + elif isinstance(val, string_types): + setattr(self, option, re.split(r',\s*|\s+', val)) + else: + if isinstance(val, list): + ok = all(isinstance(v, string_types) for v in val) + else: + ok = False + if not ok: + raise DistutilsOptionError( + "'%s' must be a list of strings (got %r)" + % (option, val)) + def reinitialize_command(self, command, reinit_subcommands=0, **kw): cmd = _Command.reinitialize_command(self, command, reinit_subcommands) vars(cmd).update(kw) diff --git a/setuptools/dist.py b/setuptools/dist.py index a2ca879516b..b10bd6f7e43 100644 --- a/setuptools/dist.py +++ b/setuptools/dist.py @@ -432,7 +432,7 @@ def parse_config_files(self, filenames=None): and loads configuration. """ - _Distribution.parse_config_files(self, filenames=filenames) + Distribution_parse_config_files.parse_config_files(self, filenames=filenames) parse_configuration(self, self.command_options) self._finalize_requires() diff --git a/setuptools/py36compat.py b/setuptools/py36compat.py index f5279696457..3d3c34ec508 100644 --- a/setuptools/py36compat.py +++ b/setuptools/py36compat.py @@ -1,7 +1,21 @@ +import io +import re import sys from distutils.errors import DistutilsOptionError from distutils.util import strtobool from distutils.debug import DEBUG +from setuptools.extern import six + + +CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') + +def detect_encoding(fp): + first_line = fp.readline() + fp.seek(0) + m = CODING_RE.match(first_line) + if m is None: + return None + return m.group(1).decode('ascii') class Distribution_parse_config_files: @@ -13,10 +27,10 @@ class Distribution_parse_config_files: as implemented in distutils. """ def parse_config_files(self, filenames=None): - from configparser import ConfigParser + from setuptools.extern.six.moves.configparser import ConfigParser # Ignore install directory options if we have a venv - if sys.prefix != sys.base_prefix: + if six.PY3 and sys.prefix != sys.base_prefix: ignore_options = [ 'install-base', 'install-platbase', 'install-lib', 'install-platlib', 'install-purelib', 'install-headers', @@ -33,11 +47,16 @@ def parse_config_files(self, filenames=None): if DEBUG: self.announce("Distribution.parse_config_files():") - parser = ConfigParser(interpolation=None) + parser = ConfigParser() for filename in filenames: - if DEBUG: - self.announce(" reading %s" % filename) - parser.read(filename) + with io.open(filename, 'rb') as fp: + encoding = detect_encoding(fp) + if DEBUG: + self.announce(" reading %s [%s]" % ( + filename, encoding or 'locale') + ) + reader = io.TextIOWrapper(fp, encoding=encoding) + (parser.read_file if six.PY3 else parser.readfp)(reader) for section in parser.sections(): options = parser.options(section) opt_dict = self.get_option_dict(section) @@ -69,12 +88,6 @@ def parse_config_files(self, filenames=None): raise DistutilsOptionError(msg) -if sys.version_info < (3,): - # Python 2 behavior is sufficient - class Distribution_parse_config_files: - pass - - if False: # When updated behavior is available upstream, # disable override here. diff --git a/setuptools/tests/test_config.py b/setuptools/tests/test_config.py index 2494a0bc200..89fde257c26 100644 --- a/setuptools/tests/test_config.py +++ b/setuptools/tests/test_config.py @@ -1,9 +1,13 @@ +# -*- coding: UTF-8 -*- +from __future__ import unicode_literals + import contextlib import pytest from distutils.errors import DistutilsOptionError, DistutilsFileError from setuptools.dist import Distribution from setuptools.config import ConfigHandler, read_configuration from setuptools.extern.six.moves.configparser import InterpolationMissingOptionError +from setuptools.tests import is_ascii class ErrConfigHandler(ConfigHandler): @@ -17,7 +21,7 @@ def make_package_dir(name, base_dir): return dir_package, init_file -def fake_env(tmpdir, setup_cfg, setup_py=None): +def fake_env(tmpdir, setup_cfg, setup_py=None, encoding='ascii'): if setup_py is None: setup_py = ( @@ -27,7 +31,7 @@ def fake_env(tmpdir, setup_cfg, setup_py=None): tmpdir.join('setup.py').write(setup_py) config = tmpdir.join('setup.cfg') - config.write(setup_cfg) + config.write(setup_cfg.encode(encoding), mode='wb') package_dir, init_file = make_package_dir('fake_package', tmpdir) @@ -317,6 +321,63 @@ def test_interpolation(self, tmpdir): with get_dist(tmpdir): pass + skip_if_not_ascii = pytest.mark.skipif(not is_ascii, reason='Test not supported with this locale') + + @skip_if_not_ascii + def test_non_ascii_1(self, tmpdir): + fake_env( + tmpdir, + '[metadata]\n' + 'description = éàïôñ\n', + encoding='utf-8' + ) + with pytest.raises(UnicodeDecodeError): + with get_dist(tmpdir): + pass + + def test_non_ascii_2(self, tmpdir): + fake_env( + tmpdir, + '# -*- coding: invalid\n' + ) + with pytest.raises(LookupError): + with get_dist(tmpdir): + pass + + def test_non_ascii_3(self, tmpdir): + fake_env( + tmpdir, + '\n' + '# -*- coding: invalid\n' + ) + with get_dist(tmpdir): + pass + + @skip_if_not_ascii + def test_non_ascii_4(self, tmpdir): + fake_env( + tmpdir, + '# -*- coding: utf-8\n' + '[metadata]\n' + 'description = éàïôñ\n', + encoding='utf-8' + ) + with get_dist(tmpdir) as dist: + assert dist.metadata.description == 'éàïôñ' + + @skip_if_not_ascii + def test_non_ascii_5(self, tmpdir): + fake_env( + tmpdir, + '# vim: set fileencoding=iso-8859-15 :\n' + '[metadata]\n' + 'description = éàïôñ\n', + encoding='iso-8859-15' + ) + with get_dist(tmpdir) as dist: + assert dist.metadata.description == 'éàïôñ' + + class TestOptions: def test_basic(self, tmpdir): diff --git a/setuptools/tests/test_egg_info.py b/setuptools/tests/test_egg_info.py index 1411f93c0d9..59ba991e98c 100644 --- a/setuptools/tests/test_egg_info.py +++ b/setuptools/tests/test_egg_info.py @@ -497,3 +497,24 @@ def __init__(self, files, base): # expect exactly one result result, = results return result + + def test_egg_info_with_src_in_setup_cfg(self, tmpdir_cwd, env): + """ + Check for issue #1136: invalid string type when + reading declarative `setup.cfg` under Python 2. + """ + build_files({ + 'setup.py': DALS( + """ + from setuptools import setup + setup(name="barbazquux", version="4.2") + """), + 'setup.cfg': DALS( + """ + [options] + package_dir = + = src + """), + 'src': { 'barbazquux.py': "" }, + }) + self._run_install_command(tmpdir_cwd, env)