Skip to content

Commit

Permalink
Regularize dogfood PEX cache use.
Browse files Browse the repository at this point in the history
The internal PEP-517/518 build system PEXes and the Pip PEXes now both
uniformly use the equivalent of `--layout loose --venv` where
distribution dependencies are always symlinks to the `installed_wheels`
cache. This helps cut down on overall cache size slightly, but also
improves the uniformity of the dogfood.
  • Loading branch information
jsirois committed Oct 13, 2024
1 parent b5aec87 commit bf934fd
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 91 deletions.
7 changes: 2 additions & 5 deletions pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
global_environment,
register_global_arguments,
)
from pex.common import CopyMode, die, is_pyc_dir, is_pyc_file, safe_mkdtemp
from pex.common import CopyMode, die, is_pyc_dir, is_pyc_file
from pex.dependency_configuration import DependencyConfiguration
from pex.dependency_manager import DependencyManager
from pex.dist_metadata import Requirement
Expand Down Expand Up @@ -923,10 +923,7 @@ def build_pex(
preamble = preamble_fd.read()

pex_builder = PEXBuilder(
path=safe_mkdtemp(),
interpreter=targets.interpreter,
preamble=preamble,
copy_mode=CopyMode.SYMLINK,
interpreter=targets.interpreter, preamble=preamble, copy_mode=CopyMode.SYMLINK
)

if options.resources_directory:
Expand Down
2 changes: 1 addition & 1 deletion pex/build_system/pep_517.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _default_build_system(
requires = ["setuptools", str(selected_pip_version.wheel_requirement)]
resolved_dists.extend(
Distribution.load(dist_location)
for dist_location in third_party.expose(
for dist_location in third_party.expose_installed_wheels(
["setuptools"], interpreter=target.get_interpreter()
)
)
Expand Down
4 changes: 2 additions & 2 deletions pex/build_system/pep_518.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess

from pex.build_system import DEFAULT_BUILD_BACKEND
from pex.common import REPRODUCIBLE_BUILDS_ENV
from pex.common import REPRODUCIBLE_BUILDS_ENV, CopyMode
from pex.dist_metadata import Distribution
from pex.interpreter import PythonInterpreter
from pex.pex import PEX
Expand Down Expand Up @@ -85,7 +85,7 @@ def create(
**extra_env # type: str
):
# type: (...) -> Union[BuildSystem, Error]
pex_builder = PEXBuilder()
pex_builder = PEXBuilder(copy_mode=CopyMode.SYMLINK)
pex_builder.info.venv = True
pex_builder.info.venv_site_packages_copies = True
pex_builder.info.venv_bin_path = BinPath.PREPEND
Expand Down
20 changes: 1 addition & 19 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,24 +568,6 @@ def __init__(self, chroot_base):
self._compress_by_file = {} # type: Dict[str, bool]
self._file_index = {} # type: Dict[str, Optional[str]]

def clone(self, into=None):
# type: (Optional[str]) -> Chroot
"""Clone this chroot.
:keyword into: (optional) An optional destination directory to clone the
Chroot into. If not specified, a temporary directory will be created.
.. versionchanged:: 0.8
The temporary directory created when ``into`` is not specified is now garbage collected on
interpreter exit.
"""
into = into or safe_mkdtemp()
new_chroot = Chroot(into)
for label, fileset in self.filesets.items():
for fn in fileset:
new_chroot.link(os.path.join(self.chroot, fn), fn, label=label)
return new_chroot

def path(self):
# type: () -> str
"""The path of the chroot."""
Expand Down Expand Up @@ -693,7 +675,7 @@ def symlink(
self._ensure_parent(dst)
abs_src = os.path.abspath(src)
abs_dst = os.path.join(self.chroot, dst)
os.symlink(abs_src, abs_dst)
os.symlink(os.path.relpath(abs_src, os.path.dirname(abs_dst)), abs_dst)

def write(
self,
Expand Down
6 changes: 5 additions & 1 deletion pex/pep_376.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pex.typing import TYPE_CHECKING, cast
from pex.util import CacheHelper
from pex.venv.virtualenv import Virtualenv
from pex.wheel import WHEEL, WheelMetadataLoadError
from pex.wheel import WHEEL, Wheel, WheelMetadataLoadError

if TYPE_CHECKING:
from typing import Callable, Iterable, Iterator, Optional, Protocol, Text, Tuple, Union
Expand Down Expand Up @@ -241,6 +241,10 @@ def load(cls, prefix_dir):
fingerprint = attr.ib() # type: Optional[str]
root_is_purelib = attr.ib() # type: bool

def wheel_file_name(self):
# type: () -> str
return Wheel.load(self.prefix_dir).wheel_file_name

def stashed_path(self, *components):
# type: (*str) -> str
return os.path.join(self.prefix_dir, self.stash_dir, *components)
Expand Down
25 changes: 0 additions & 25 deletions pex/pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,31 +179,6 @@ def chroot(self):
# type: () -> Chroot
return self._chroot

def clone(self, into=None):
"""Clone this PEX environment into a new PEXBuilder.
:keyword into: (optional) An optional destination directory to clone this PEXBuilder into. If
not specified, a temporary directory will be created.
Clones PEXBuilder into a new location. This is useful if the PEXBuilder has been frozen and
rendered immutable.
.. versionchanged:: 0.8
The temporary directory created when ``into`` is not specified is now garbage collected on
interpreter exit.
"""
chroot_clone = self._chroot.clone(into=into)
clone = self.__class__(
chroot=chroot_clone,
interpreter=self._interpreter,
pex_info=self._pex_info.copy(),
preamble=self._preamble,
copy_mode=self._copy_mode,
)
clone.set_shebang(self._shebang)
clone._distributions = self._distributions.copy()
return clone

def path(self):
# type: () -> str
return self.chroot().path()
Expand Down
71 changes: 58 additions & 13 deletions pex/pip/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@

from __future__ import absolute_import

import glob
import hashlib
import os
from collections import OrderedDict
from textwrap import dedent

from pex import pex_warnings, third_party
from pex import pep_427, pex_warnings, third_party
from pex.atomic_directory import atomic_directory
from pex.cache.dirs import CacheDir
from pex.common import REPRODUCIBLE_BUILDS_ENV, pluralize, safe_mkdtemp
from pex.common import REPRODUCIBLE_BUILDS_ENV, CopyMode, pluralize, safe_mkdtemp
from pex.dist_metadata import Requirement
from pex.executor import Executor
from pex.interpreter import PythonInterpreter
from pex.jobs import iter_map_parallel
from pex.orderedset import OrderedSet
from pex.pep_503 import ProjectName
from pex.pex import PEX
Expand All @@ -26,7 +29,7 @@
from pex.third_party import isolated
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.util import named_temporary_file
from pex.util import CacheHelper
from pex.venv.virtualenv import InstallationChoice, Virtualenv

if TYPE_CHECKING:
Expand All @@ -53,13 +56,13 @@ def _pip_installation(
if not chroot.is_finalized():
from pex.pex_builder import PEXBuilder

isolated_pip_builder = PEXBuilder(path=chroot.work_dir)
isolated_pip_builder = PEXBuilder(path=chroot.work_dir, copy_mode=CopyMode.SYMLINK)
isolated_pip_builder.info.venv = True
# Allow REPRODUCIBLE_BUILDS_ENV PYTHONHASHSEED env var to take effect if needed.
isolated_pip_builder.info.venv_hermetic_scripts = False
for dist_location in iter_distribution_locations():
isolated_pip_builder.add_dist_location(dist=dist_location)
with named_temporary_file(prefix="", suffix=".py", mode="w") as fp:
with open(os.path.join(chroot.work_dir, "__pex_patched_pip__.py"), "w") as fp:
fp.write(
dedent(
"""\
Expand All @@ -76,8 +79,7 @@ def _pip_installation(
"""
).format(patches_package_env_var_name=Pip._PATCHES_PACKAGE_ENV_VAR_NAME)
)
fp.close()
isolated_pip_builder.set_executable(fp.name, "__pex_patched_pip__.py")
isolated_pip_builder.set_executable(fp.name, "exe.py")
isolated_pip_builder.freeze()
pip_cache = os.path.join(pip_root, "pip_cache")
pip_pex = ensure_venv(PEX(pip_pex_path, interpreter=pip_interpreter))
Expand Down Expand Up @@ -111,7 +113,7 @@ def _vendored_installation(

def expose_vendored():
# type: () -> Iterator[str]
return third_party.expose(("pip", "setuptools"), interpreter=interpreter)
return third_party.expose_installed_wheels(("pip", "setuptools"), interpreter=interpreter)

if not extra_requirements:
return _pip_installation(
Expand Down Expand Up @@ -184,6 +186,35 @@ def iter_distribution_locations():
)


class PipInstallError(Exception):
"""Indicates an error installing Pip."""


def _install_wheel(wheel_path):
# type: (str) -> str

# TODO(John Sirois): Consolidate with pex.resolver.BuildAndInstallRequest.
# https://github.com/pex-tool/pex/issues/2556
wheel_hash = CacheHelper.hash(wheel_path, hasher=hashlib.sha256)
wheel_name = os.path.basename(wheel_path)
destination = CacheDir.INSTALLED_WHEELS.path(wheel_hash, wheel_name)
with atomic_directory(destination) as atomic_dir:
if not atomic_dir.is_finalized():
installed_wheel = pep_427.install_wheel_chroot(
wheel_path=wheel_path, destination=atomic_dir.work_dir
)
runtime_key_dir = CacheDir.INSTALLED_WHEELS.path(
installed_wheel.fingerprint
or CacheHelper.dir_hash(atomic_dir.work_dir, hasher=hashlib.sha256)
)
with atomic_directory(runtime_key_dir) as runtime_atomic_dir:
if not runtime_atomic_dir.is_finalized():
source_path = os.path.join(runtime_atomic_dir.work_dir, wheel_name)
relative_target_path = os.path.relpath(destination, runtime_key_dir)
os.symlink(relative_target_path, source_path)
return destination


def _bootstrap_pip(
version, # type: PipVersionValue
interpreter=None, # type: Optional[PythonInterpreter]
Expand All @@ -200,11 +231,25 @@ def bootstrap_pip():
install_pip=InstallationChoice.YES,
)

for req in version.requirements:
project_name = req.name
target_dir = os.path.join(chroot, "reqs", project_name)
venv.interpreter.execute(["-m", "pip", "install", "--target", target_dir, str(req)])
yield target_dir
wheels = os.path.join(chroot, "wheels")
wheels_cmd = ["-m", "pip", "wheel", "--wheel-dir", wheels]
wheels_cmd.extend(str(req) for req in version.requirements)
try:
venv.interpreter.execute(args=wheels_cmd)
except Executor.NonZeroExit as e:
raise PipInstallError(
"Failed to bootstrap Pip {version}.\n"
"Failed to download its dependencies: {err}".format(version=version, err=str(e))
)

return iter_map_parallel(
inputs=glob.glob(os.path.join(wheels, "*.whl")),
function=_install_wheel,
costing_function=os.path.getsize,
noun="wheel",
verb="install",
verb_past="installed",
)

return bootstrap_pip

Expand Down
30 changes: 30 additions & 0 deletions pex/third_party/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import absolute_import

import contextlib
import hashlib
import importlib
import os
import re
Expand All @@ -12,10 +13,13 @@
import zipfile
from collections import OrderedDict, namedtuple

from pex.common import CopyMode, iter_copytree

# NB: ~All pex imports are performed lazily to play well with the un-imports performed by both the
# PEX runtime when it demotes the bootstrap code and any pex modules that uninstalled
# VendorImporters un-import.
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper

if TYPE_CHECKING:
from typing import Container, Iterable, Iterator, List, Optional, Tuple
Expand Down Expand Up @@ -620,6 +624,32 @@ def expose(
yield path


def expose_installed_wheels(
dists, # type: Iterable[str]
interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> Iterator[str]

from pex.atomic_directory import atomic_directory
from pex.cache.dirs import CacheDir
from pex.pep_376 import InstalledWheel

for path in expose(dists, interpreter=interpreter):
# TODO(John Sirois): Maybe consolidate with pex.resolver.BuildAndInstallRequest.
# https://github.com/pex-tool/pex/issues/2556
installed_wheel = InstalledWheel.load(path)
wheel_file_name = installed_wheel.wheel_file_name()
install_hash = installed_wheel.fingerprint or CacheHelper.dir_hash(
path, hasher=hashlib.sha256
)
wheel_path = CacheDir.INSTALLED_WHEELS.path(install_hash, wheel_file_name)
with atomic_directory(wheel_path) as atomic_dir:
if not atomic_dir.is_finalized():
for _src, _dst in iter_copytree(path, atomic_dir.work_dir, copy_mode=CopyMode.LINK):
pass
yield wheel_path


# Implicitly install an importer for vendored code on the first import of pex.third_party.
# N.B.: attrs must be exposed to make use of `cache_hash=True` since that generates and compiles
# code on the fly that generated code does a bare `import attr`.
Expand Down
35 changes: 34 additions & 1 deletion pex/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import absolute_import

import itertools
import os
import re
from email.message import Message
Expand All @@ -14,10 +15,12 @@
load_metadata,
parse_message,
)
from pex.orderedset import OrderedSet
from pex.third_party.packaging import tags
from pex.typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import Dict, Text
from typing import Dict, Text, Tuple

import attr # vendor:skip
else:
Expand Down Expand Up @@ -61,6 +64,15 @@ def load(cls, location):
files = attr.ib() # type: MetadataFiles
metadata = attr.ib() # type: Message

@property
def tags(self):
# type: () -> Tuple[tags.Tag, ...]
return tuple(
itertools.chain.from_iterable(
tags.parse_tag(tag) for tag in self.metadata.get_all("Tag", ())
)
)

@property
def root_is_purelib(self):
# type: () -> bool
Expand Down Expand Up @@ -116,6 +128,27 @@ def load(cls, wheel_path):
metadata = attr.ib() # type: WHEEL
data_dir = attr.ib() # type: str

@property
def wheel_file_name(self):
# type: () -> str

interpreters = OrderedSet() # type: OrderedSet[str]
abis = OrderedSet() # type: OrderedSet[str]
platforms = OrderedSet() # type: OrderedSet[str]
for tag in self.metadata.tags:
interpreters.add(tag.interpreter)
abis.add(tag.abi)
platforms.add(tag.platform)
tag = "{interpreters}-{abis}-{platforms}".format(
interpreters=".".join(interpreters), abis=".".join(abis), platforms=".".join(platforms)
)

return "{project_name}-{version}-{tag}.whl".format(
project_name=self.metadata_files.metadata.project_name.raw,
version=self.metadata_files.metadata.version.raw,
tag=tag,
)

@property
def root_is_purelib(self):
# type: () -> bool
Expand Down
Loading

0 comments on commit bf934fd

Please sign in to comment.