From 34a6836e654272e8a2480cd891ab27254caec2c0 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 8 Oct 2023 10:25:11 -0700 Subject: [PATCH 1/2] Fix site-packages identification. In the process of working a new feature, it was discovered that `sys.path` manipulations leaked into `PythonInterpreter` when they were performed before the `__pex__` import hook. This is fixed by always going through the interpreter disk cache path, which has properly isolated `sys.path` for quite some time, eliminating the one exception for the current interpreter. Fixing this case caused several tests to fail that exposed several eager uses of `PythonInterpreter.get()` that would execute in import of various modules. Those cases are fixed to either be lazy or use `sys.executable`. Along the way, fix up the ~TODO for migrating from `distutils.sysconfig` to `sysconfig` for determining the site-packages directories. This should preempt issues with vendor (e.g.: Debian) supplied Python 3.12+. --- pex/interpreter.py | 38 ++++++++++++++++++++-------- pex/pex.py | 25 +++++++++++------- pex/pip/version.py | 15 ++++++----- pex/resolve/downloads.py | 4 +-- pex/resolve/target_options.py | 29 ++++++++------------- pex/resolver.py | 2 +- tests/integration/test_issue_1232.py | 9 +++---- tests/test_interpreter.py | 30 ++++++++++++++++++++++ tests/test_pex.py | 18 +++++-------- 9 files changed, 105 insertions(+), 65 deletions(-) diff --git a/pex/interpreter.py b/pex/interpreter.py index a493a13cb..e21424a1e 100644 --- a/pex/interpreter.py +++ b/pex/interpreter.py @@ -114,18 +114,32 @@ def _iter_site_packages(): # include a getsitepackages function. TRACER.log("The site module does not define getsitepackages: {err}".format(err=e)) + # The distutils package was deprecated in 3.10 and removed in 3.12. The sysconfig module was + # introduced in 3.2 but is not usable for our purposes until 3.11. We need + # `get_default_scheme` to get the current interpreter's installation scheme, which was made + # public in 3.10, but not made correct for venv interpreters until 3.11. + try: + import sysconfig + + get_default_scheme = getattr(sysconfig, "get_default_scheme", None) + if get_default_scheme and sys.version_info[:2] >= (3, 11): + scheme = get_default_scheme() + yield sysconfig.get_path("purelib", scheme) + yield sysconfig.get_path("platlib", scheme) + return + except ImportError: + pass + + # The distutils.sysconfig module is deprecated in Python 3.10 but still around. It goes away + # in 3.12 with viable replacements in sysconfig starting in Python 3.11. See above where we + # use those replacements preferentially, when available. try: from distutils.sysconfig import get_python_lib yield get_python_lib(plat_specific=False) yield get_python_lib(plat_specific=True) - except ImportError as e: - # The distutils.sysconfig module is deprecated in Python 3.10 but still around. - # Eventually it will go away with replacements in sysconfig that we'll add at that time - # as another site packages source. - TRACER.log( - "The distutils.sysconfig module does not define get_python_lib: {err}".format(err=e) - ) + except ImportError: + pass @staticmethod def _iter_extras_paths(site_packages): @@ -304,6 +318,11 @@ def encode(self): def binary(self): return self._binary + @property + def is_venv(self): + # type: () -> bool + return self._prefix != self._base_prefix + @property def prefix(self): # type: () -> str @@ -979,9 +998,6 @@ def _spawn_from_binary(cls, binary): cached_interpreter = cls._PYTHON_INTERPRETER_BY_NORMALIZED_PATH.get(canonicalized_binary) if cached_interpreter is not None: return SpawnedJob.completed(cached_interpreter) - if canonicalized_binary == cls.canonicalize_path(sys.executable): - current_interpreter = cls(PythonIdentity.get()) - return SpawnedJob.completed(current_interpreter) return cls._spawn_from_binary_external(canonicalized_binary) @classmethod @@ -1149,7 +1165,7 @@ def binary(self): def is_venv(self): # type: () -> bool """Return `True` if this interpreter is homed in a virtual environment.""" - return self._identity.prefix != self._identity.base_prefix + return self._identity.is_venv @property def prefix(self): diff --git a/pex/pex.py b/pex/pex.py index e59d126a4..8a14fea11 100644 --- a/pex/pex.py +++ b/pex/pex.py @@ -1,7 +1,7 @@ # Copyright 2014 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). -from __future__ import absolute_import, print_function +from __future__ import absolute_import import ast import os @@ -17,7 +17,7 @@ from pex.executor import Executor from pex.finders import get_entry_point_from_console_script, get_script_from_distributions from pex.inherit_path import InheritPath -from pex.interpreter import PythonInterpreter +from pex.interpreter import PythonIdentity, PythonInterpreter from pex.layout import Layout from pex.orderedset import OrderedSet from pex.pex_info import PexInfo @@ -51,33 +51,40 @@ class IsolatedSysPath(object): @staticmethod def _expand_paths(*paths): # type: (*str) -> Iterator[str] + seen = set() for path in paths: + if path in seen: + continue + seen.add(path) yield path if not os.path.isabs(path): yield os.path.abspath(path) - yield os.path.realpath(path) + realpath = os.path.realpath(path) + if realpath != path: + yield realpath @classmethod def for_pex( cls, - interpreter, # type: PythonInterpreter + interpreter, # type: Union[PythonInterpreter, PythonIdentity] pex, # type: str pex_pex=None, # type: Optional[str] ): # type: (...) -> IsolatedSysPath - sys_path = OrderedSet(interpreter.sys_path) + ident = interpreter.identity if isinstance(interpreter, PythonInterpreter) else interpreter + sys_path = OrderedSet(ident.sys_path) sys_path.add(pex) sys_path.add(Bootstrap.locate().path) if pex_pex: sys_path.add(pex_pex) site_packages = OrderedSet() # type: OrderedSet[str] - for site_lib in interpreter.site_packages: + for site_lib in ident.site_packages: TRACER.log("Discarding site packages path: {site_lib}".format(site_lib=site_lib)) site_packages.add(site_lib) extras_paths = OrderedSet() # type: OrderedSet[str] - for extras_path in interpreter.extras_paths: + for extras_path in ident.extras_paths: TRACER.log("Discarding site extras path: {extras_path}".format(extras_path=extras_path)) extras_paths.add(extras_path) @@ -85,7 +92,7 @@ def for_pex( sys_path=sys_path, site_packages=site_packages, extras_paths=extras_paths, - is_venv=interpreter.is_venv, + is_venv=ident.is_venv, ) def __init__( @@ -339,7 +346,7 @@ def all_distribution_paths(path): # type: (Optional[str]) -> Iterable[str] if path is None: return () - locations = set(dist.location for dist in find_distributions(path)) + locations = set(dist.location for dist in find_distributions([path])) return {path} | locations | set(os.path.realpath(path) for path in locations) for path_element in sys.path: diff --git a/pex/pip/version.py b/pex/pip/version.py index 83e07058f..e31222d1f 100644 --- a/pex/pip/version.py +++ b/pex/pip/version.py @@ -4,8 +4,8 @@ from __future__ import absolute_import import os +import sys -from pex import targets from pex.dist_metadata import Requirement from pex.enum import Enum from pex.pep_440 import Version @@ -14,7 +14,7 @@ from pex.typing import TYPE_CHECKING, cast if TYPE_CHECKING: - from typing import Iterable, Optional, Tuple + from typing import Iterable, Optional, Tuple, Union class PipVersionValue(Enum.Value): @@ -72,10 +72,13 @@ def requirements(self): return self.requirement, self.setuptools_requirement, self.wheel_requirement def requires_python_applies(self, target=None): - # type: (Optional[Target]) -> bool + # type: (Optional[Union[Version,Target]]) -> bool if not self.requires_python: return True + if isinstance(target, Version): + return str(target) in self.requires_python + return LocalInterpreter.create( interpreter=target.get_interpreter() if target else None ).requires_python_applies( @@ -102,12 +105,12 @@ def __init__(self, preferred): def __get__(self, obj, objtype=None): if not hasattr(self, "_default"): self._default = None - current_target = targets.current() + current_version = Version(".".join(map(str, sys.version_info[:3]))) preferred_versions = ( [PipVersionValue.overridden()] if PipVersionValue.overridden() else self._preferred ) for preferred_version in preferred_versions: - if preferred_version.requires_python_applies(current_target): + if preferred_version.requires_python_applies(current_version): self._default = preferred_version break if self._default is None: @@ -115,7 +118,7 @@ def __get__(self, obj, objtype=None): ( version for version in PipVersionValue._iter_values() - if not version.hidden and version.requires_python_applies(current_target) + if not version.hidden and version.requires_python_applies(current_version) ), key=lambda pv: pv.version, ) diff --git a/pex/resolve/downloads.py b/pex/resolve/downloads.py index 93afb335e..b9230b6c2 100644 --- a/pex/resolve/downloads.py +++ b/pex/resolve/downloads.py @@ -51,9 +51,9 @@ def get_downloads_dir(pex_root=None): @attr.s(frozen=True) class ArtifactDownloader(object): resolver = attr.ib() # type: Resolver - target = attr.ib(default=LocalInterpreter.create()) # type: Target + target = attr.ib(factory=LocalInterpreter.create) # type: Target package_index_configuration = attr.ib( - default=PackageIndexConfiguration.create() + factory=PackageIndexConfiguration.create ) # type: PackageIndexConfiguration max_parallel_jobs = attr.ib(default=None) # type: Optional[int] pip = attr.ib(init=False) # type: Pip diff --git a/pex/resolve/target_options.py b/pex/resolve/target_options.py index 1fd93fb5e..789b1a29b 100644 --- a/pex/resolve/target_options.py +++ b/pex/resolve/target_options.py @@ -61,16 +61,12 @@ def register( ), ) - current_interpreter = PythonInterpreter.get() - program = sys.argv[0] singe_interpreter_info_cmd = ( - "PEX_TOOLS=1 {current_interpreter} {program} interpreter --verbose --indent 4".format( - current_interpreter=current_interpreter.binary, program=program + "pex3 interpreter inspect --python {current_interpreter} --verbose --indent 4".format( + current_interpreter=sys.executable ) ) - all_interpreters_info_cmd = ( - "PEX_TOOLS=1 {program} interpreter --all --verbose --indent 4".format(program=program) - ) + all_interpreters_info_cmd = "pex3 interpreter inspect --all --verbose --indent 4" parser.add_argument( "--interpreter-constraint", @@ -86,7 +82,7 @@ def register( "the constraints. Try `{singe_interpreter_info_cmd}` to find the exact interpreter " "constraints of {current_interpreter} and `{all_interpreters_info_cmd}` to find out " "the interpreter constraints of all Python interpreters on the $PATH.".format( - current_interpreter=current_interpreter.binary, + current_interpreter=sys.executable, singe_interpreter_info_cmd=singe_interpreter_info_cmd, all_interpreters_info_cmd=all_interpreters_info_cmd, ) @@ -94,14 +90,11 @@ def register( ) if include_platforms: - _register_platform_options( - parser, current_interpreter, singe_interpreter_info_cmd, all_interpreters_info_cmd - ) + _register_platform_options(parser, singe_interpreter_info_cmd, all_interpreters_info_cmd) def _register_platform_options( parser, # type: _ActionsContainer - current_interpreter, # type: PythonInterpreter singe_interpreter_info_cmd, # type: str all_interpreters_info_cmd, # type: str ): @@ -120,13 +113,11 @@ def _register_platform_options( "string composed of fields: ---. " "These fields stem from wheel name conventions as outlined in " "https://www.python.org/dev/peps/pep-0427#file-name-convention and influenced by " - "https://www.python.org/dev/peps/pep-0425. For the current interpreter at " - "{current_interpreter} the full platform string is {current_platform}. To find out " - "more, try `{all_interpreters_info_cmd}` to print out the platform for all " - "interpreters on the $PATH or `{singe_interpreter_info_cmd}` to inspect the single " - "interpreter {current_interpreter}.".format( - current_interpreter=current_interpreter.binary, - current_platform=current_interpreter.platform, + "https://www.python.org/dev/peps/pep-0425. To find out more, try " + "`{all_interpreters_info_cmd}` to print out the platform for all interpreters on the " + "$PATH or `{singe_interpreter_info_cmd}` to inspect the single interpreter " + "{current_interpreter}.".format( + current_interpreter=sys.executable, singe_interpreter_info_cmd=singe_interpreter_info_cmd, all_interpreters_info_cmd=all_interpreters_info_cmd, ) diff --git a/pex/resolver.py b/pex/resolver.py index 6e6b5bb19..023c68bd5 100644 --- a/pex/resolver.py +++ b/pex/resolver.py @@ -1154,7 +1154,7 @@ def _download_internal( class LocalDistribution(object): path = attr.ib() # type: str fingerprint = attr.ib() # type: str - target = attr.ib(default=targets.current()) # type: Target + target = attr.ib(factory=targets.current) # type: Target @fingerprint.default def _calculate_fingerprint(self): diff --git a/tests/integration/test_issue_1232.py b/tests/integration/test_issue_1232.py index 6a0b2b48f..83d9a9d27 100644 --- a/tests/integration/test_issue_1232.py +++ b/tests/integration/test_issue_1232.py @@ -95,13 +95,10 @@ def vendored_toplevel(isolated_dir): current_pex_isolation = set(current_isolated_vendoreds.keys()) ^ set( current_pex_isolated_vendoreds.keys() ) - assert 1 == len(current_pex_isolation), ( - "Since the modified Pex PEX was built from a Pex PEX an isolation of the Pex PEX bootstrap " - "code should have occurred bringing the total isolations up to two." + assert 0 == len(current_pex_isolation), ( + "Since the current Pex PEX was built from the same Pex code as the current loose source " + "Pex, a new isolation of the Pex PEX bootstrap code should not have occurred." ) - current_pex_vendoreds = current_pex_isolated_vendoreds[current_pex_isolation.pop()] - assert "pip" not in current_pex_vendoreds, "Expected a Pex runtime isolation." - assert "wheel" not in current_pex_vendoreds, "Expected a Pex runtime isolation." # 3. Isolate modified Pex PEX at build-time. # === diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py index 97a288d8b..8c3a2c160 100644 --- a/tests/test_interpreter.py +++ b/tests/test_interpreter.py @@ -508,3 +508,33 @@ def test_sys_path(python): 'Its expected the sys_path matches the runtime sys.path with the exception of the PWD ("") ' "head entry." ) + + +def test_sys_path_leak_for_current(tmpdir): + # type: (Any) -> None + + expected_sys_path = [ + entry + for entry in json.loads( + subprocess.check_output( + args=[ + sys.executable, + "-sE", + "-c", + "import json, sys; json.dump(sys.path, sys.stdout)", + ] + ).decode("utf-8") + ) + # N.B.: We expect the PythonInterpreter infrastructure to elide the implicit "" / CWD + # sys.path entry. + if entry + ] + pex_root = os.path.join(str(tmpdir), "pex_root") + + new_sys_path_entry = os.path.join(str(tmpdir), "not-interpreter-sys-path") + with ENV.patch(PEX_ROOT=pex_root), PythonInterpreter._cleared_memory_cache(): + sys.path.append(new_sys_path_entry) + try: + assert tuple(expected_sys_path) == PythonInterpreter.get().sys_path + finally: + sys.path.pop() diff --git a/tests/test_pex.py b/tests/test_pex.py index 850fc4acb..58dfa9c2b 100644 --- a/tests/test_pex.py +++ b/tests/test_pex.py @@ -17,13 +17,12 @@ from pex import resolver from pex.common import safe_mkdir, safe_open, temporary_dir from pex.compatibility import PY2, WINDOWS, to_bytes -from pex.dist_metadata import Distribution, Requirement -from pex.interpreter import PythonInterpreter +from pex.dist_metadata import Distribution +from pex.interpreter import PythonIdentity, PythonInterpreter from pex.pex import PEX, IsolatedSysPath from pex.pex_builder import PEXBuilder from pex.pex_info import PexInfo from pex.resolve.configured_resolver import ConfiguredResolver -from pex.resolve.resolver_configuration import PipConfiguration from pex.typing import TYPE_CHECKING from pex.util import named_temporary_file from testing import ( @@ -222,8 +221,7 @@ def test_site_libs(tmpdir): site_packages = os.path.join(str(tmpdir), "site-packages") os.mkdir(site_packages) mock_site_packages.return_value = {site_packages} - with PythonInterpreter._cleared_memory_cache(): - site_libs = PythonInterpreter.get().site_packages + site_libs = PythonIdentity.get().site_packages assert site_packages in site_libs @@ -245,10 +243,9 @@ def test_site_libs_symlink(tmpdir): os.symlink(site_packages, site_packages_link) mock_site_packages.return_value = [site_packages_link] - with PythonInterpreter._cleared_memory_cache(): - isolated_sys_path = IsolatedSysPath.for_pex( - interpreter=PythonInterpreter.get(), pex=os.devnull - ) + isolated_sys_path = IsolatedSysPath.for_pex( + interpreter=PythonIdentity.get(), pex=os.devnull + ) assert os.path.join(sys_path_entry, "module.py") in isolated_sys_path assert os.path.realpath(site_packages) not in isolated_sys_path assert site_packages_link not in isolated_sys_path @@ -267,8 +264,7 @@ def test_site_libs_excludes_prefix(): site_packages = os.path.join(tempdir, "site-packages") os.mkdir(site_packages) mock_site_packages.return_value = [site_packages, sys.prefix] - with PythonInterpreter._cleared_memory_cache(): - site_libs = PythonInterpreter.get().site_packages + site_libs = PythonIdentity.get().site_packages assert site_packages in site_libs assert sys.prefix not in site_libs From 9749f41cc45482c7e28ab6359050b6452177bf1f Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 8 Oct 2023 19:27:40 -0700 Subject: [PATCH 2/2] Fixup the vendor bootstrap. --- pex/vendor/__main__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pex/vendor/__main__.py b/pex/vendor/__main__.py index 93a6cd53c..928664e2f 100644 --- a/pex/vendor/__main__.py +++ b/pex/vendor/__main__.py @@ -15,6 +15,7 @@ from redbaron import CommentNode, LiteralyEvaluable, NameNode, RedBaron from pex.common import safe_delete, safe_mkdir, safe_mkdtemp, safe_open, safe_rmtree +from pex.interpreter import PythonInterpreter from pex.vendor import VendorSpec, iter_vendor_specs @@ -206,11 +207,12 @@ def find_site_packages(prefix_dir): ) -def vendorize(root_dir, vendor_specs, prefix, update): +def vendorize(interpreter, root_dir, vendor_specs, prefix, update): # There is bootstrapping catch-22 here. In order for `pex.third_party` to work, all 3rdparty # importable code must lie at the top of its vendored chroot. Although # `pex.pep_376.Record.fixup_install` encodes the logic to achieve this layout, we can't run - # that without 1st approximating that layout!. We take the tack of doing the --prefix + # that without the current `pex.interpreter.PythonInterpreter` and 1st approximating that + # layout!. We take the tack of saving the current interpreter and then doing the `--prefix` # install off into a temp dir, moving the site-packages importables into the vendor chroot, # importing the code we'll need, then moving the importables back. moved = {} @@ -361,7 +363,9 @@ def vendorize(root_dir, vendor_specs, prefix, update): project_name=dist.project_name, version=dist.version, ) - record.fixup_install(exclude=("constraints.txt", "__init__.py", "__pycache__")) + record.fixup_install( + exclude=("constraints.txt", "__init__.py", "__pycache__"), interpreter=interpreter + ) if __name__ == "__main__": @@ -375,10 +379,13 @@ def vendorize(root_dir, vendor_specs, prefix, update): ) options = parser.parse_args() + # Grab the PythonInterpreter before we nuke the supporting code needed to identify it. + interpreter = PythonInterpreter.get() root_directory = VendorSpec.ROOT try: safe_rmtree(VendorSpec.vendor_root()) vendorize( + interpreter=interpreter, root_dir=root_directory, vendor_specs=list(iter_vendor_specs()), prefix="pex.third_party",