Skip to content

Commit

Permalink
Support booting via /bin/sh with --sh-boot. (#1721)
Browse files Browse the repository at this point in the history
Allow users to choose `sh` as the boot mechanism for their PEXes. Not
only is `/bin/sh` probably more widely available than any given Python
shebang, but it's also much faster.

Relates to #1115 and #1540
  • Loading branch information
jsirois authored Apr 14, 2022
1 parent 7696f02 commit 8d0f40b
Show file tree
Hide file tree
Showing 8 changed files with 681 additions and 16 deletions.
39 changes: 36 additions & 3 deletions pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from pex import pex_warnings
from pex.argparse import HandleBoolAction
from pex.bin.sh_boot import create_sh_boot_script
from pex.commands.command import (
GlobalConfigurationError,
global_environment,
Expand Down Expand Up @@ -325,6 +326,25 @@ def configure_clp_pex_environment(parser):
"#!. This overrides the default behavior, which picks an environment Python "
"interpreter compatible with the one used to build the PEX file.",
)
group.add_argument(
"--sh-boot",
"--no-sh-boot",
dest="sh_boot",
default=False,
action=HandleBoolAction,
help=(
"Create a modified ZIPAPP that uses `/bin/sh` to boot. If you know the machines that "
"the PEX will be distributed to have POSIX compliant `/bin/sh` (almost all do, "
"see: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sh.html); then this "
"is probably the way you want your PEX to boot. Instead of launching via a Python "
"shebang, the PEX will launch via a `#!/bin/sh` shebang that executes a small script "
"embedded in the head of the PEX ZIPAPP that performs initial interpreter selection "
"and re-execution of the underlying PEX in a way that is often more robust than a "
"Python shebang and always faster on 2nd and subsequent runs since the sh script has a "
"constant overhead of O(1ms) whereas the Python overhead to perform the same "
"interpreter selection and re-execution is O(100ms)."
),
)


def configure_clp_pex_entry_points(parser):
Expand Down Expand Up @@ -774,10 +794,23 @@ def do_main(
verify_entry_point=options.validate_ep,
)

if options.pex_name is not None:
log("Saving PEX file to %s" % options.pex_name, V=options.verbosity)
pex_file = options.pex_name
if pex_file is not None:
log("Saving PEX file to {pex_file}".format(pex_file=pex_file), V=options.verbosity)
if options.sh_boot:
with TRACER.timed("Creating /bin/sh boot script"):
pex_builder.set_shebang("/bin/sh")
script = create_sh_boot_script(
pex_name=pex_file,
pex_info=pex.pex_info(),
targets=targets,
interpreter=pex.interpreter,
python_shebang=options.python_shebang,
)
pex_builder.set_header(script)

pex_builder.build(
options.pex_name,
pex_file,
bytecode_compile=options.compile,
deterministic_timestamp=not options.use_system_time,
layout=options.layout,
Expand Down
245 changes: 245 additions & 0 deletions pex/bin/sh_boot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, print_function

import itertools
import os
import shlex
from textwrap import dedent

from pex import dist_metadata, variables
from pex.interpreter import PythonIdentity, PythonInterpreter, calculate_binary_name
from pex.interpreter_constraints import iter_compatible_versions
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
from pex.targets import Targets
from pex.third_party import pkg_resources
from pex.typing import TYPE_CHECKING
from pex.version import __version__

if TYPE_CHECKING:
from typing import Iterable, Optional, Tuple

import attr # vendor:skip

from pex.dist_metadata import DistributionLike
else:
from pex.third_party import attr


@attr.s(frozen=True)
class PythonBinaryName(object):
name = attr.ib() # type: str
version = attr.ib() # type: Tuple[int, ...]

def render(self, version_components=2):
# type: (int) -> str
return "{name}{version}".format(
name=self.name, version=".".join(map(str, self.version[:version_components]))
)


def _calculate_applicable_binary_names(
targets, # type: Targets
interpreter_constraints, # type: Iterable[str]
pex_dist=None, # type: Optional[str]
):
# type: (...) -> Iterable[str]

# Find all possible major / minor version targeted by this Pex, preferring explicit targets and
# then filling in any other versions implied by interpreter constraints to be checked after
# those.

ic_majors_minors = OrderedSet() # type: OrderedSet[PythonBinaryName]
python_requirements = tuple(
PythonIdentity.parse_requirement(ic) for ic in interpreter_constraints
)
if python_requirements:
ic_majors_minors.update(
PythonBinaryName(
name=calculate_binary_name(python_requirement.project_name), version=version
)
for python_requirement in python_requirements
for version in iter_compatible_versions(
requires_python=[str(python_requirement.specifier)]
)
)
# If we get targets from ICs, we only want explicitly specified local interpreter targets;
# otherwise, if there are none, we want the implicit current target interpreter.
only_explicit = len(ic_majors_minors) > 0

names = OrderedSet() # type: OrderedSet[PythonBinaryName]
# 1. Explicit targets 1st.
for target in targets.unique_targets(only_explicit=only_explicit):
if target.python_version is not None:
names.add(
PythonBinaryName(
name=target.binary_name(version_components=0), version=target.python_version
)
)

# 2. ICs next.
names.update(ic_majors_minors)

# 3. As the final backstop, fill in all the interpreters Pex is compatible with since Pex can do
# more sophisticated detection and re-direction from these during its own bootstrap. When doing
# so, select these interpreters from newest to oldest since it more likely any given machine
# will have Python 3 at this point than it will Python 2.
pex_requires_python = ">=2.7"
pex_distribution = pex_dist or pkg_resources.working_set.find(
pkg_resources.Requirement.parse("pex=={version}".format(version=__version__))
) # type: DistributionLike
if pex_distribution:
pex_requires_python = str(dist_metadata.requires_python(pex_distribution))
pex_supported_python_versions = tuple(
reversed(list(iter_compatible_versions(requires_python=[pex_requires_python])))
)

# Favor CPython over PyPy since the interpreter discovered via these names will just be used
# to re-execute into Pex using the right interpreter. That should be a low-latency operation
# for CPython end targets and for PyPy it need not be quite as fast since it inherently asks you
# to trade startup latency for longer term jit performance.
names.update(
PythonBinaryName(name="python", version=version)
for version in pex_supported_python_versions
)
names.update(
PythonBinaryName(name="pypy", version=version) for version in pex_supported_python_versions
)

# Favor more specific interpreter names since these should need re-direction less often.
return OrderedSet(
itertools.chain(
(name.render(version_components=2) for name in names),
(name.render(version_components=1) for name in names),
(name.render(version_components=0) for name in names),
)
)


def create_sh_boot_script(
pex_name, # type: str
pex_info, # type: PexInfo
targets, # type: Targets
interpreter, # type: PythonInterpreter
python_shebang=None, # type: Optional[str]
):
# type: (...) -> str
"""Creates the body of a POSIX `sh` compatible script that executes a PEX ZIPAPP appended to it.
N.B.: The shebang line is not included.
Although a Python ZIPAPP is self-executing, it is only self-executing if the shebang happens to
work on a given machine. Since there is variance with how pythons are named in various installs,
this can lead to a failure to launch the ZIPAPP at all at the OS level.
If the Python ZIPAPP shebang works, PEX still needs to check if it has installed itself in the
PEX_ROOT and if the current interpreter selected by the shebang is appropriate and then it needs
to re-execute itself using the appropriate interpreter and final installed location. This takes
a non-trivial amount of time. Roughly 50ms in the warm case where the current interpreter is
correct and the PEX ZIPAPP is already installed in the PEX_ROOT.
Using this `sh` script can provide higher shebang success rates since almost every Unix has an
`sh` interpreter at `/bin/sh`, and it reduces re-exec overhead to ~2ms in the warm case (and
adds ~2ms in the cold case).
"""
python = None # type: Optional[str]
python_args = None # type: Optional[str]
if python_shebang:
shebang = python_shebang[2:] if python_shebang.startswith("#!") else python_shebang
args = shlex.split(shebang)
python = args[0]
python_args = " ".join(args[1:])

python_names = tuple(
_calculate_applicable_binary_names(
targets=targets,
interpreter_constraints=pex_info.interpreter_constraints,
)
)

venv_dir = pex_info.venv_dir(pex_file=pex_name, interpreter=interpreter)
if venv_dir:
pex_installed_path = os.path.join(venv_dir, "pex")
else:
pex_hash = pex_info.pex_hash
if pex_hash is None:
raise ValueError("Expected pex_hash to be set already in PEX-INFO.")
pex_installed_path = variables.unzip_dir(pex_info.pex_root, pex_hash)

return dedent(
"""\
# N.B.: This script should stick to syntax defined for POSIX `sh` and avoid non-builtins.
# See: https://pubs.opengroup.org/onlinepubs/9699919799/idx/shell.html
set -eu
VENV="{venv}"
# N.B.: This ensures tilde-expansion of the DEFAULT_PEX_ROOT value.
DEFAULT_PEX_ROOT="$(echo {pex_root})"
DEFAULT_PYTHON="{python}"
DEFAULT_PYTHON_ARGS="{python_args}"
PEX_ROOT="${{PEX_ROOT:-${{DEFAULT_PEX_ROOT}}}}"
PEX="${{PEX_ROOT}}/{pex_installed_relpath}"
if [ -n "${{VENV}}" -a -x "${{PEX}}" ]; then
# We're a --venv execution mode PEX installed under the PEX_ROOT and the venv
# interpreter to use is embedded in the shebang of our venv pex script; so just
# execute that script directly.
exec "${{PEX}}" "$@"
fi
find_python() {{
for python in \\
{pythons} \\
; do
if command -v "${{python}}" 2>/dev/null; then
return
fi
done
}}
if [ -x "${{DEFAULT_PYTHON}}" ]; then
python_exe="${{DEFAULT_PYTHON}} ${{DEFAULT_PYTHON_ARGS}}"
else
python_exe="$(find_python)"
fi
if [ -n "${{python_exe}}" ]; then
if [ -n "${{PEX_VERBOSE:-}}" ]; then
echo >&2 "$0 used /bin/sh boot to select python: ${{python_exe}} for re-exec..."
fi
if [ -z "${{VENV}}" -a -e "${{PEX}}" ]; then
# We're a --zipapp execution mode PEX installed under the PEX_ROOT with a
# __main__.py in our top-level directory; so execute Python against that
# directory.
exec ${{python_exe}} "${{PEX}}" "$@"
else
# The slow path: this PEX zipapp is not installed yet. Run the PEX zipapp so it
# can install itself, rebuilding its fast path layout under the PEX_ROOT.
exec ${{python_exe}} "$0" "$@"
fi
fi
echo >&2 "Failed to find any of these python binaries on the PATH:"
for python in \\
{pythons} \\
; do
echo >&2 "${{python}}"
done
echo >&2 "Either adjust your $PATH which is currently:"
echo >&2 "${{PATH}}"
echo >&2 -n "Or else install an appropriate Python that provides one of the binaries in "
echo >&2 "this list."
exit 1
"""
).format(
venv="1" if pex_info.venv else "",
python=python,
python_args=python_args,
pythons=" \\\n".join('"{python}"'.format(python=python) for python in python_names),
pex_root=pex_info.raw_pex_root,
pex_installed_relpath=os.path.relpath(pex_installed_path, pex_info.pex_root),
)
38 changes: 32 additions & 6 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,18 @@
InterpreterOrError = Union["PythonInterpreter", InterpreterIdentificationError]


def calculate_binary_name(
platform_python_implementation, python_version=None # type: Optional[Tuple[int, ...]]
):
# type: (...) -> str
name = "python"
if platform_python_implementation == "PyPy":
name = "pypy"
if not python_version:
return name
return "{name}{version}".format(name=name, version=".".join(map(str, python_version)))


class PythonIdentity(object):
class Error(Exception):
pass
Expand Down Expand Up @@ -305,7 +317,12 @@ def iter_supported_platforms(self):
yield Platform.from_tag(tag)

@classmethod
def parse_requirement(cls, requirement, default_interpreter="CPython"):
def parse_requirement(
cls,
requirement, # type: Union[Requirement, str]
default_interpreter="CPython", # type: str
):
# type: (...) -> Requirement
if isinstance(requirement, Requirement):
return requirement
elif isinstance(requirement, string):
Expand All @@ -328,13 +345,22 @@ def matches(self, requirement):
raise self.UnknownRequirement(str(e))
return self.distribution in requirement

def binary_name(self, version_components=2):
# type: (int) -> str
return calculate_binary_name(
platform_python_implementation=self._interpreter_name,
python_version=self._version[:version_components] if version_components > 0 else None,
)

def hashbang(self):
# type: () -> str
if self._interpreter_name == "PyPy":
hashbang_string = "pypy" if self._version[0] == 2 else "pypy{}".format(self._version[0])
else:
hashbang_string = "python{}.{}".format(self._version[0], self._version[1])
return "#!/usr/bin/env {}".format(hashbang_string)
return "#!/usr/bin/env {}".format(
self.binary_name(
version_components=0
if self._interpreter_name == "PyPy" and self.version[0] == 2
else 2
)
)

@property
def python(self):
Expand Down
4 changes: 2 additions & 2 deletions pex/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from abc import abstractmethod
from contextlib import contextmanager

from pex.common import atomic_directory, is_python_script, open_zip, safe_copy, safe_mkdir
from pex.common import atomic_directory, is_script, open_zip, safe_copy, safe_mkdir
from pex.enum import Enum
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
Expand Down Expand Up @@ -253,7 +253,7 @@ def __str__(self):
@contextmanager
def _identify_layout(pex):
# type: (str) -> Iterator[Optional[_Layout]]
if zipfile.is_zipfile(pex) and is_python_script(
if zipfile.is_zipfile(pex) and is_script(
pex,
# N.B.: A PEX file need not be executable since it can always be run via `python a.pex`.
check_executable=False,
Expand Down
Loading

0 comments on commit 8d0f40b

Please sign in to comment.