Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix site-packages identification. #2262

Merged
merged 3 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,32 @@ def _iter_site_packages():
# include a getsitepackages function.
TRACER.log("The site module does not define getsitepackages: {err}".format(err=e))

# The distutils package was deprecated in 3.10 and removed in 3.12. The sysconfig module was
# introduced in 3.2 but is not usable for our purposes until 3.11. We need
# `get_default_scheme` to get the current interpreter's installation scheme, which was made
# public in 3.10, but not made correct for venv interpreters until 3.11.
try:
import sysconfig

get_default_scheme = getattr(sysconfig, "get_default_scheme", None)
if get_default_scheme and sys.version_info[:2] >= (3, 11):
scheme = get_default_scheme()
yield sysconfig.get_path("purelib", scheme)
yield sysconfig.get_path("platlib", scheme)
return
except ImportError:
pass

# The distutils.sysconfig module is deprecated in Python 3.10 but still around. It goes away
# in 3.12 with viable replacements in sysconfig starting in Python 3.11. See above where we
# use those replacements preferentially, when available.
try:
from distutils.sysconfig import get_python_lib

yield get_python_lib(plat_specific=False)
yield get_python_lib(plat_specific=True)
except ImportError as e:
# The distutils.sysconfig module is deprecated in Python 3.10 but still around.
# Eventually it will go away with replacements in sysconfig that we'll add at that time
# as another site packages source.
TRACER.log(
"The distutils.sysconfig module does not define get_python_lib: {err}".format(err=e)
)
except ImportError:
pass

@staticmethod
def _iter_extras_paths(site_packages):
Expand Down Expand Up @@ -304,6 +318,11 @@ def encode(self):
def binary(self):
return self._binary

@property
def is_venv(self):
# type: () -> bool
return self._prefix != self._base_prefix

@property
def prefix(self):
# type: () -> str
Expand Down Expand Up @@ -979,9 +998,6 @@ def _spawn_from_binary(cls, binary):
cached_interpreter = cls._PYTHON_INTERPRETER_BY_NORMALIZED_PATH.get(canonicalized_binary)
if cached_interpreter is not None:
return SpawnedJob.completed(cached_interpreter)
if canonicalized_binary == cls.canonicalize_path(sys.executable):
current_interpreter = cls(PythonIdentity.get())
Copy link
Member Author

@jsirois jsirois Oct 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the bug. Here PythonIdentity.get() slurps up the ambient sys.path which may have been mucked with. Totally classic premature optimization. Don't optimize, instead just don't pessimize. Do the simplest thing, you're not smart enough to do more.

return SpawnedJob.completed(current_interpreter)
return cls._spawn_from_binary_external(canonicalized_binary)

@classmethod
Expand Down Expand Up @@ -1149,7 +1165,7 @@ def binary(self):
def is_venv(self):
# type: () -> bool
"""Return `True` if this interpreter is homed in a virtual environment."""
return self._identity.prefix != self._identity.base_prefix
return self._identity.is_venv

@property
def prefix(self):
Expand Down
27 changes: 15 additions & 12 deletions pex/pex.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, print_function
from __future__ import absolute_import

import ast
import itertools
import os
import sys
from site import USER_SITE
Expand All @@ -17,7 +18,7 @@
from pex.executor import Executor
from pex.finders import get_entry_point_from_console_script, get_script_from_distributions
from pex.inherit_path import InheritPath
from pex.interpreter import PythonInterpreter
from pex.interpreter import PythonIdentity, PythonInterpreter
from pex.layout import Layout
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
Expand Down Expand Up @@ -50,42 +51,44 @@
class IsolatedSysPath(object):
@staticmethod
def _expand_paths(*paths):
# type: (*str) -> Iterator[str]
for path in paths:
# type: (*str) -> OrderedSet[str]
def iter_synonyms(path):
yield path
if not os.path.isabs(path):
yield os.path.abspath(path)
yield os.path.abspath(path)
yield os.path.realpath(path)

return OrderedSet(itertools.chain.from_iterable(iter_synonyms(path) for path in paths))

@classmethod
def for_pex(
cls,
interpreter, # type: PythonInterpreter
interpreter, # type: Union[PythonInterpreter, PythonIdentity]
pex, # type: str
pex_pex=None, # type: Optional[str]
):
# type: (...) -> IsolatedSysPath
sys_path = OrderedSet(interpreter.sys_path)
ident = interpreter.identity if isinstance(interpreter, PythonInterpreter) else interpreter
sys_path = OrderedSet(ident.sys_path)
sys_path.add(pex)
sys_path.add(Bootstrap.locate().path)
if pex_pex:
sys_path.add(pex_pex)

site_packages = OrderedSet() # type: OrderedSet[str]
for site_lib in interpreter.site_packages:
for site_lib in ident.site_packages:
TRACER.log("Discarding site packages path: {site_lib}".format(site_lib=site_lib))
site_packages.add(site_lib)

extras_paths = OrderedSet() # type: OrderedSet[str]
for extras_path in interpreter.extras_paths:
for extras_path in ident.extras_paths:
TRACER.log("Discarding site extras path: {extras_path}".format(extras_path=extras_path))
extras_paths.add(extras_path)

return cls(
sys_path=sys_path,
site_packages=site_packages,
extras_paths=extras_paths,
is_venv=interpreter.is_venv,
is_venv=ident.is_venv,
)

def __init__(
Expand Down Expand Up @@ -339,7 +342,7 @@ def all_distribution_paths(path):
# type: (Optional[str]) -> Iterable[str]
if path is None:
return ()
locations = set(dist.location for dist in find_distributions(path))
locations = set(dist.location for dist in find_distributions([path]))
return {path} | locations | set(os.path.realpath(path) for path in locations)

for path_element in sys.path:
Expand Down
15 changes: 9 additions & 6 deletions pex/pip/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from __future__ import absolute_import

import os
import sys

from pex import targets
from pex.dist_metadata import Requirement
from pex.enum import Enum
from pex.pep_440 import Version
Expand All @@ -14,7 +14,7 @@
from pex.typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import Iterable, Optional, Tuple
from typing import Iterable, Optional, Tuple, Union


class PipVersionValue(Enum.Value):
Expand Down Expand Up @@ -72,10 +72,13 @@ def requirements(self):
return self.requirement, self.setuptools_requirement, self.wheel_requirement

def requires_python_applies(self, target=None):
# type: (Optional[Target]) -> bool
# type: (Optional[Union[Version,Target]]) -> bool
if not self.requires_python:
return True

if isinstance(target, Version):
return str(target) in self.requires_python

return LocalInterpreter.create(
interpreter=target.get_interpreter() if target else None
).requires_python_applies(
Expand All @@ -102,20 +105,20 @@ def __init__(self, preferred):
def __get__(self, obj, objtype=None):
if not hasattr(self, "_default"):
self._default = None
current_target = targets.current()
current_version = Version(".".join(map(str, sys.version_info[:3])))
preferred_versions = (
[PipVersionValue.overridden()] if PipVersionValue.overridden() else self._preferred
)
for preferred_version in preferred_versions:
if preferred_version.requires_python_applies(current_target):
if preferred_version.requires_python_applies(current_version):
self._default = preferred_version
break
if self._default is None:
self._default = max(
(
version
for version in PipVersionValue._iter_values()
if not version.hidden and version.requires_python_applies(current_target)
if not version.hidden and version.requires_python_applies(current_version)
),
key=lambda pv: pv.version,
)
Expand Down
4 changes: 2 additions & 2 deletions pex/resolve/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def get_downloads_dir(pex_root=None):
@attr.s(frozen=True)
class ArtifactDownloader(object):
resolver = attr.ib() # type: Resolver
target = attr.ib(default=LocalInterpreter.create()) # type: Target
target = attr.ib(factory=LocalInterpreter.create) # type: Target
package_index_configuration = attr.ib(
default=PackageIndexConfiguration.create()
factory=PackageIndexConfiguration.create
) # type: PackageIndexConfiguration
max_parallel_jobs = attr.ib(default=None) # type: Optional[int]
pip = attr.ib(init=False) # type: Pip
Expand Down
33 changes: 12 additions & 21 deletions pex/resolve/target_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,12 @@ def register(
),
)

current_interpreter = PythonInterpreter.get()
program = sys.argv[0]
singe_interpreter_info_cmd = (
"PEX_TOOLS=1 {current_interpreter} {program} interpreter --verbose --indent 4".format(
current_interpreter=current_interpreter.binary, program=program
single_interpreter_info_cmd = (
"pex3 interpreter inspect --python {current_interpreter} --verbose --indent 4".format(
current_interpreter=sys.executable
)
)
all_interpreters_info_cmd = (
"PEX_TOOLS=1 {program} interpreter --all --verbose --indent 4".format(program=program)
)
all_interpreters_info_cmd = "pex3 interpreter inspect --all --verbose --indent 4"

parser.add_argument(
"--interpreter-constraint",
Expand All @@ -86,22 +82,19 @@ def register(
"the constraints. Try `{singe_interpreter_info_cmd}` to find the exact interpreter "
"constraints of {current_interpreter} and `{all_interpreters_info_cmd}` to find out "
"the interpreter constraints of all Python interpreters on the $PATH.".format(
current_interpreter=current_interpreter.binary,
singe_interpreter_info_cmd=singe_interpreter_info_cmd,
current_interpreter=sys.executable,
singe_interpreter_info_cmd=single_interpreter_info_cmd,
all_interpreters_info_cmd=all_interpreters_info_cmd,
)
),
)

if include_platforms:
_register_platform_options(
parser, current_interpreter, singe_interpreter_info_cmd, all_interpreters_info_cmd
)
_register_platform_options(parser, single_interpreter_info_cmd, all_interpreters_info_cmd)


def _register_platform_options(
parser, # type: _ActionsContainer
current_interpreter, # type: PythonInterpreter
singe_interpreter_info_cmd, # type: str
all_interpreters_info_cmd, # type: str
):
Expand All @@ -120,13 +113,11 @@ def _register_platform_options(
"string composed of fields: <platform>-<python impl abbr>-<python version>-<abi>. "
"These fields stem from wheel name conventions as outlined in "
"https://www.python.org/dev/peps/pep-0427#file-name-convention and influenced by "
"https://www.python.org/dev/peps/pep-0425. For the current interpreter at "
"{current_interpreter} the full platform string is {current_platform}. To find out "
"more, try `{all_interpreters_info_cmd}` to print out the platform for all "
"interpreters on the $PATH or `{singe_interpreter_info_cmd}` to inspect the single "
"interpreter {current_interpreter}.".format(
current_interpreter=current_interpreter.binary,
current_platform=current_interpreter.platform,
"https://www.python.org/dev/peps/pep-0425. To find out more, try "
"`{all_interpreters_info_cmd}` to print out the platform for all interpreters on the "
"$PATH or `{singe_interpreter_info_cmd}` to inspect the single interpreter "
"{current_interpreter}.".format(
current_interpreter=sys.executable,
singe_interpreter_info_cmd=singe_interpreter_info_cmd,
all_interpreters_info_cmd=all_interpreters_info_cmd,
)
Expand Down
2 changes: 1 addition & 1 deletion pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,7 @@ def _download_internal(
class LocalDistribution(object):
path = attr.ib() # type: str
fingerprint = attr.ib() # type: str
target = attr.ib(default=targets.current()) # type: Target
target = attr.ib(factory=targets.current) # type: Target

@fingerprint.default
def _calculate_fingerprint(self):
Expand Down
13 changes: 10 additions & 3 deletions pex/vendor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from redbaron import CommentNode, LiteralyEvaluable, NameNode, RedBaron

from pex.common import safe_delete, safe_mkdir, safe_mkdtemp, safe_open, safe_rmtree
from pex.interpreter import PythonInterpreter
from pex.vendor import VendorSpec, iter_vendor_specs


Expand Down Expand Up @@ -206,11 +207,12 @@ def find_site_packages(prefix_dir):
)


def vendorize(root_dir, vendor_specs, prefix, update):
def vendorize(interpreter, root_dir, vendor_specs, prefix, update):
# There is bootstrapping catch-22 here. In order for `pex.third_party` to work, all 3rdparty
# importable code must lie at the top of its vendored chroot. Although
# `pex.pep_376.Record.fixup_install` encodes the logic to achieve this layout, we can't run
# that without 1st approximating that layout!. We take the tack of doing the --prefix
# that without the current `pex.interpreter.PythonInterpreter` and 1st approximating that
# layout!. We take the tack of saving the current interpreter and then doing the `--prefix`
# install off into a temp dir, moving the site-packages importables into the vendor chroot,
# importing the code we'll need, then moving the importables back.
moved = {}
Expand Down Expand Up @@ -361,7 +363,9 @@ def vendorize(root_dir, vendor_specs, prefix, update):
project_name=dist.project_name,
version=dist.version,
)
record.fixup_install(exclude=("constraints.txt", "__init__.py", "__pycache__"))
record.fixup_install(
exclude=("constraints.txt", "__init__.py", "__pycache__"), interpreter=interpreter
)


if __name__ == "__main__":
Expand All @@ -375,10 +379,13 @@ def vendorize(root_dir, vendor_specs, prefix, update):
)
options = parser.parse_args()

# Grab the PythonInterpreter before we nuke the supporting code needed to identify it.
interpreter = PythonInterpreter.get()
root_directory = VendorSpec.ROOT
try:
safe_rmtree(VendorSpec.vendor_root())
vendorize(
interpreter=interpreter,
root_dir=root_directory,
vendor_specs=list(iter_vendor_specs()),
prefix="pex.third_party",
Expand Down
9 changes: 3 additions & 6 deletions tests/integration/test_issue_1232.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,10 @@ def vendored_toplevel(isolated_dir):
current_pex_isolation = set(current_isolated_vendoreds.keys()) ^ set(
current_pex_isolated_vendoreds.keys()
)
assert 1 == len(current_pex_isolation), (
"Since the modified Pex PEX was built from a Pex PEX an isolation of the Pex PEX bootstrap "
"code should have occurred bringing the total isolations up to two."
assert 0 == len(current_pex_isolation), (
"Since the current Pex PEX was built from the same Pex code as the current loose source "
"Pex, a new isolation of the Pex PEX bootstrap code should not have occurred."
)
current_pex_vendoreds = current_pex_isolated_vendoreds[current_pex_isolation.pop()]
assert "pip" not in current_pex_vendoreds, "Expected a Pex runtime isolation."
assert "wheel" not in current_pex_vendoreds, "Expected a Pex runtime isolation."

# 3. Isolate modified Pex PEX at build-time.
# ===
Expand Down
30 changes: 30 additions & 0 deletions tests/test_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,33 @@ def test_sys_path(python):
'Its expected the sys_path matches the runtime sys.path with the exception of the PWD ("") '
"head entry."
)


def test_sys_path_leak_for_current(tmpdir):
# type: (Any) -> None

expected_sys_path = [
entry
for entry in json.loads(
subprocess.check_output(
args=[
sys.executable,
"-sE",
"-c",
"import json, sys; json.dump(sys.path, sys.stdout)",
]
).decode("utf-8")
)
# N.B.: We expect the PythonInterpreter infrastructure to elide the implicit "" / CWD
# sys.path entry.
if entry
]
pex_root = os.path.join(str(tmpdir), "pex_root")

new_sys_path_entry = os.path.join(str(tmpdir), "not-interpreter-sys-path")
with ENV.patch(PEX_ROOT=pex_root), PythonInterpreter._cleared_memory_cache():
sys.path.append(new_sys_path_entry)
try:
assert tuple(expected_sys_path) == PythonInterpreter.get().sys_path
finally:
sys.path.pop()
Loading