Skip to content

Commit

Permalink
Move wheel install logic into Pex. (pex-tool#2295)
Browse files Browse the repository at this point in the history
This sets the stage for doing runtime installation of wheels without
needing to ship a copy of Pip in every PEX file. To help prove the
robustness, convert the current build time installation of wheel chroots
to this mechanism.

Work towards pex-tool#2292
  • Loading branch information
jsirois authored Dec 4, 2023
1 parent d7ee142 commit 8b41837
Show file tree
Hide file tree
Showing 18 changed files with 775 additions and 197 deletions.
18 changes: 9 additions & 9 deletions docker/base/install_pythons.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ PYENV_VERSIONS=(
3.9.18
3.10.13
3.12.0
pypy2.7-7.3.12
pypy2.7-7.3.13
pypy3.5-7.0.0
pypy3.6-7.3.3
pypy3.7-7.3.9
pypy3.8-7.3.11
pypy3.9-7.3.12
pypy3.10-7.3.12
pypy3.9-7.3.13
pypy3.10-7.3.13
)

git clone https://github.com/pyenv/pyenv.git "${PYENV_ROOT}" && (
Expand All @@ -31,13 +31,13 @@ git clone https://github.com/pyenv/pyenv.git "${PYENV_ROOT}" && (
PATH="${PATH}:${PYENV_ROOT}/bin"

for version in "${PYENV_VERSIONS[@]}"; do
if [[ "${version}" == "pypy2.7-7.3.12" ]]; then
# Installation of pypy2.7-7.3.12 fails like so without adjusting the version of get-pip it
if [[ "${version}" == "pypy2.7-7.3.13" ]]; then
# Installation of pypy2.7-7.3.13 fails like so without adjusting the version of get-pip it
# uses:
# $ pyenv install pypy2.7-7.3.12
# Downloading pypy2.7-v7.3.12-linux64.tar.bz2...
# -> https://downloads.python.org/pypy/pypy2.7-v7.3.12-linux64.tar.bz2
# Installing pypy2.7-v7.3.12-linux64...
# $ pyenv install pypy2.7-7.3.13
# Downloading pypy2.7-v7.3.13-linux64.tar.bz2...
# -> https://downloads.python.org/pypy/pypy2.7-v7.3.13-linux64.tar.bz2
# Installing pypy2.7-v7.3.13-linux64...
# Installing pip from https://bootstrap.pypa.io/get-pip.py...
# error: failed to install pip via get-pip.py
# ...
Expand Down
4 changes: 3 additions & 1 deletion pex/build_system/pep_517.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def _default_build_system(
requires = ["setuptools", "wheel"]
resolved = tuple(
Distribution.load(dist_location)
for dist_location in third_party.expose(requires)
for dist_location in third_party.expose(
requires, interpreter=target.get_interpreter()
)
)
extra_env.update(__PEX_UNVENDORED__="1")
else:
Expand Down
81 changes: 79 additions & 2 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from typing import (
Any,
Callable,
Container,
DefaultDict,
Dict,
Iterable,
Expand Down Expand Up @@ -357,7 +358,7 @@ def safe_sleep(seconds):


def chmod_plus_x(path):
# type: (str) -> None
# type: (Text) -> None
"""Equivalent of unix `chmod a+x path`"""
path_mode = os.stat(path).st_mode
path_mode &= int("777", 8)
Expand Down Expand Up @@ -444,7 +445,7 @@ def can_write_dir(path):


def touch(file):
# type: (str) -> None
# type: (Text) -> None
"""Equivalent of unix `touch path`."""
with safe_open(file, "a"):
os.utime(file, None)
Expand Down Expand Up @@ -709,3 +710,79 @@ def iter_files():
for filename, arcname in iter_files():
maybe_write_parent_dirs(arcname)
write_entry(filename, arcname)


def relative_symlink(
src, # type: Text
dst, # type: Text
):
# type: (...) -> None
"""Creates a symlink to `src` at `dst` using the relative path to `src` from `dst`.
:param src: The target of the symlink.
:param dst: The path to create the symlink at.
"""
dst_parent = os.path.dirname(dst)
rel_src = os.path.relpath(src, dst_parent)
os.symlink(rel_src, dst)


def iter_copytree(
src, # type: Text
dst, # type: Text
exclude=(), # type: Container[Text]
symlink=False, # type: bool
):
# type: (...) -> Iterator[Tuple[Text, Text]]
"""Copies the directory tree rooted at `src` to `dst` yielding a tuple for each copied file.
When not using symlinks, if hard links are appropriate they will be used; otherwise files are
copied.
N.B.: The returned iterator must be consumed to drive the copying operations to completion.
:param src: The source directory tree to copy.
:param dst: The destination location to copy the source tree to.
:param exclude: Names (basenames) of files and directories to exclude from copying.
:param symlink: Whether to use symlinks instead of copies (or hard links).
:return: An iterator over tuples identifying the copied files of the form `(src, dst)`.
"""
safe_mkdir(dst)
link = True
for root, dirs, files in os.walk(src, topdown=True, followlinks=True):
if src == root:
dirs[:] = [d for d in dirs if d not in exclude]
files[:] = [f for f in files if f not in exclude]

for path, is_dir in itertools.chain(
zip(dirs, itertools.repeat(True)), zip(files, itertools.repeat(False))
):
src_entry = os.path.join(root, path)
dst_entry = os.path.join(dst, os.path.relpath(src_entry, src))
if not is_dir:
yield src_entry, dst_entry
try:
if symlink:
relative_symlink(src_entry, dst_entry)
elif is_dir:
os.mkdir(dst_entry)
else:
# We only try to link regular files since linking a symlink on Linux can produce
# another symlink, which leaves open the possibility the src_entry target could
# later go missing leaving the dst_entry dangling.
if link and not os.path.islink(src_entry):
try:
os.link(src_entry, dst_entry)
continue
except OSError as e:
if e.errno != errno.EXDEV:
raise e
link = False
shutil.copy(src_entry, dst_entry)
except OSError as e:
if e.errno != errno.EEXIST:
raise e

if symlink:
# Once we've symlinked the top-level directories and files, we've "copied" everything.
return
10 changes: 5 additions & 5 deletions pex/dist_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _strip_sdist_path(sdist_path):
return filename


def _parse_message(message):
def parse_message(message):
# type: (bytes) -> Message
return cast(Message, Parser().parse(StringIO(to_unicode(message))))

Expand Down Expand Up @@ -153,7 +153,7 @@ def _find_installed_metadata_files(
metadata_files = glob.glob(os.path.join(location, metadata_dir_glob, metadata_file_name))
for path in metadata_files:
with open(path, "rb") as fp:
metadata = _parse_message(fp.read())
metadata = parse_message(fp.read())
project_name_and_version = ProjectNameAndVersion.from_parsed_pkg_info(
source=path, pkg_info=metadata
)
Expand Down Expand Up @@ -194,7 +194,7 @@ def find_wheel_metadata(location):
continue

with zf.open(name) as fp:
metadata = _parse_message(fp.read())
metadata = parse_message(fp.read())
project_name_and_version = ProjectNameAndVersion.from_parsed_pkg_info(
source=os.path.join(location, name), pkg_info=metadata
)
Expand Down Expand Up @@ -245,7 +245,7 @@ def find_zip_sdist_metadata(location):
if name.endswith("/") or not _is_dist_pkg_info_file_path(name):
continue
with zf.open(name) as fp:
metadata = _parse_message(fp.read())
metadata = parse_message(fp.read())
project_name_and_version = ProjectNameAndVersion.from_parsed_pkg_info(
source=os.path.join(location, name), pkg_info=metadata
)
Expand Down Expand Up @@ -277,7 +277,7 @@ def find_tar_sdist_metadata(location):
),
)
with closing(file_obj) as fp:
metadata = _parse_message(fp.read())
metadata = parse_message(fp.read())
project_name_and_version = ProjectNameAndVersion.from_parsed_pkg_info(
source=os.path.join(location, member.name), pkg_info=metadata
)
Expand Down
24 changes: 21 additions & 3 deletions pex/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

from __future__ import absolute_import

import contextlib
import os
import ssl
import sys
import time
from contextlib import closing, contextmanager

Expand All @@ -29,6 +32,20 @@
BinaryIO = None


@contextmanager
def guard_stdout():
# type: () -> Iterator[None]
# Under PyPy 3.9 and 3.10, `ssl.create_default_context` causes spurious informational text about
# SSL certs to be emitted to stdout; so we squelch this.
if hasattr(sys, "pypy_version_info") and sys.version_info[:2] >= (3, 9):
with open(os.devnull, "w") as fp:
# The `contextlib.redirect_stdout` function is available for Python 3.4+.
with contextlib.redirect_stdout(fp): # type: ignore[attr-defined]
yield
else:
yield


class URLFetcher(object):
USER_AGENT = "pex/{version}".format(version=__version__)

Expand All @@ -45,9 +62,10 @@ def __init__(
self._timeout = network_configuration.timeout
self._max_retries = network_configuration.retries

ssl_context = ssl.create_default_context(cafile=network_configuration.cert)
if network_configuration.client_cert:
ssl_context.load_cert_chain(network_configuration.client_cert)
with guard_stdout():
ssl_context = ssl.create_default_context(cafile=network_configuration.cert)
if network_configuration.client_cert:
ssl_context.load_cert_chain(network_configuration.client_cert)

proxies = None # type: Optional[Dict[str, str]]
if network_configuration.proxy:
Expand Down
67 changes: 65 additions & 2 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def get(cls, binary=None):
sys_path=sys_path,
site_packages=site_packages,
extras_paths=extras_paths,
paths=sysconfig.get_paths(),
packaging_version=packaging_version,
python_tag=preferred_tag.interpreter,
abi_tag=preferred_tag.abi,
Expand All @@ -226,7 +227,7 @@ def get(cls, binary=None):
def decode(cls, encoded):
TRACER.log("creating PythonIdentity from encoded: %s" % encoded, V=9)
values = json.loads(encoded)
if len(values) != 14:
if len(values) != 15:
raise cls.InvalidError("Invalid interpreter identity: %s" % encoded)

supported_tags = values.pop("supported_tags")
Expand Down Expand Up @@ -264,6 +265,7 @@ def __init__(
sys_path, # type: Iterable[str]
site_packages, # type: Iterable[str]
extras_paths, # type: Iterable[str]
paths, # type: Mapping[str, str]
packaging_version, # type: str
python_tag, # type: str
abi_tag, # type: str
Expand All @@ -284,6 +286,7 @@ def __init__(
self._sys_path = tuple(sys_path)
self._site_packages = tuple(site_packages)
self._extras_paths = tuple(extras_paths)
self._paths = dict(paths)
self._packaging_version = packaging_version
self._python_tag = python_tag
self._abi_tag = abi_tag
Expand All @@ -301,6 +304,7 @@ def encode(self):
sys_path=self._sys_path,
site_packages=self._site_packages,
extras_paths=self._extras_paths,
paths=self._paths,
packaging_version=self._packaging_version,
python_tag=self._python_tag,
abi_tag=self._abi_tag,
Expand Down Expand Up @@ -348,6 +352,11 @@ def extras_paths(self):
# type: () -> Tuple[str, ...]
return self._extras_paths

@property
def paths(self):
# type: () -> Mapping[str, str]
return self._paths

@property
def python_tag(self):
return self._python_tag
Expand Down Expand Up @@ -1319,6 +1328,14 @@ def supported_platforms(self):
self._supported_platforms = frozenset(self._identity.iter_supported_platforms())
return self._supported_platforms

def shebang(self, args=None):
# type: (Optional[Text]) -> Text
"""Return the contents of an appropriate shebang for this interpreter and args.
The shebang will include the leading `#!` but will not include a trailing new line character.
"""
return create_shebang(self._binary, python_args=args)

def create_isolated_cmd(
self,
args=None, # type: Optional[Iterable[str]]
Expand Down Expand Up @@ -1444,10 +1461,56 @@ def spawn_python_job(
# need to set `__PEX_UNVENDORED__`. See: vendor.__main__.ImportRewriter._modify_import.
subprocess_env["__PEX_UNVENDORED__"] = "1"

pythonpath.extend(third_party.expose(expose))
pythonpath.extend(third_party.expose(expose, interpreter=interpreter))

interpreter = interpreter or PythonInterpreter.get()
cmd, process = interpreter.open_process(
args=args, pythonpath=pythonpath, env=subprocess_env, **subprocess_kwargs
)
return Job(command=cmd, process=process)


# See the "Test results from various systems" table here:
# https://www.in-ulm.de/~mascheck/various/shebang/#length
MAX_SHEBANG_LENGTH = 512 if sys.platform == "darwin" else 128


def create_shebang(
python_exe, # type: Text
python_args=None, # type: Optional[Text]
max_shebang_length=MAX_SHEBANG_LENGTH, # type: int
):
# type: (...) -> Text
"""Return the contents of an appropriate shebang for the given Python interpreter and args.
The shebang will include the leading `#!` but will not include a trailing new line character.
"""
python = "{exe} {args}".format(exe=python_exe, args=python_args) if python_args else python_exe
shebang = "#!{python}".format(python=python)

# N.B.: We add 1 to be conservative and account for the EOL character.
if len(shebang) + 1 <= max_shebang_length:
return shebang

# This trick relies on /bin/sh being ubiquitous and the concordance of:
# 1. Python: triple quoted strings plus allowance for free-floating string values in
# python files.
# 2. sh: Any number of pairs of `'` evaluating away when followed immediately by a
# command string (`''command` -> `command`) and lazy parsing allowing for invalid sh
# content immediately following an exec line.
# The end result is a file that is both a valid sh script with a short shebang and a
# valid Python program.
return (
dedent(
"""\
#!/bin/sh
# N.B.: This python script executes via a /bin/sh re-exec as a hack to work around a
# potential maximum shebang length of {max_shebang_length} bytes on this system which
# the python interpreter `exec`ed below would violate.
''''exec {python} "$0" "$@"
'''
"""
)
.format(max_shebang_length=max_shebang_length, python=python)
.strip()
)
Loading

0 comments on commit 8b41837

Please sign in to comment.