Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --scie option to produce native PEX exes. #2466

Merged
merged 12 commits into from
Jul 17, 2024
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ env:
# importing builtins like `fcntl` as outlined in https://github.com/pex-tool/pex/issues/1391.
_PEX_TEST_PYENV_VERSIONS: "2.7 3.7 3.10"
_PEX_PEXPECT_TIMEOUT: 10
# We have integration tests that exercise `--scie` support and these can trigger downloads from
# GitHub Releases that needed elevated rate limit quota, which this gives.
SCIENCE_AUTH_API_GITHUB_COM_BEARER: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: CI-${{ github.ref }}
# Queue on all branches and tags, but only cancel overlapping PR burns.
Expand Down
45 changes: 44 additions & 1 deletion pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentError, ArgumentParser
from textwrap import TextWrapper

from pex import dependency_configuration, pex_warnings
from pex import dependency_configuration, pex_warnings, scie
from pex.argparse import HandleBoolAction
from pex.commands.command import (
GlobalConfigurationError,
Expand All @@ -29,6 +29,7 @@
from pex.dist_metadata import Requirement
from pex.docs.command import serve_html_docs
from pex.enum import Enum
from pex.fetcher import URLFetcher
from pex.inherit_path import InheritPath
from pex.interpreter_constraints import InterpreterConstraint, InterpreterConstraints
from pex.layout import Layout, ensure_installed
Expand Down Expand Up @@ -56,6 +57,7 @@
from pex.resolve.resolver_options import create_pip_configuration
from pex.resolve.resolvers import Unsatisfiable, sorted_requirements
from pex.result import Error, ResultError, catch, try_
from pex.scie import ScieConfiguration
from pex.targets import Targets
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING, cast
Expand Down Expand Up @@ -314,6 +316,8 @@ def configure_clp_pex_options(parser):
),
)

scie.register_options(group)

group.add_argument(
"--always-write-cache",
dest="always_write_cache",
Expand Down Expand Up @@ -1233,6 +1237,27 @@ def do_main(
cmdline, # type: List[str]
env, # type: Dict[str, str]
):
scie_options = scie.extract_options(options)
if scie_options and not options.pex_name:
raise ValueError(
"You must specify `-o`/`--output-file` to use `{scie_options}`.".format(
scie_options=scie.render_options(scie_options)
)
)
scie_configuration = None # type: Optional[ScieConfiguration]
if scie_options:
scie_configuration = scie_options.create_configuration(targets=targets)
if not scie_configuration:
raise ValueError(
"You selected `{scie_options}`, but none of the selected targets have "
"compatible interpreters that can be embedded to form a scie:\n{targets}".format(
scie_options=scie.render_options(scie_options),
targets="\n".join(
target.render_description() for target in targets.unique_targets()
),
)
)

with TRACER.timed("Building pex"):
pex_builder = build_pex(
requirement_configuration=requirement_configuration,
Expand Down Expand Up @@ -1276,6 +1301,24 @@ def do_main(
verbose=options.seed == Seed.VERBOSE,
)
print(seed_info)
if scie_configuration:
url_fetcher = URLFetcher(
network_configuration=resolver_configuration.network_configuration,
password_entries=resolver_configuration.repos_configuration.password_entries,
handle_file_urls=True,
)
with TRACER.timed("Building scie(s)"):
for par_info in scie.build(
configuration=scie_configuration, pex_file=pex_file, url_fetcher=url_fetcher
):
log(
"Saved PEX scie for CPython {version} on {platform} to {scie}".format(
version=par_info.target.version_str,
platform=par_info.platform,
scie=os.path.relpath(par_info.file),
),
V=options.verbosity,
)
else:
if not _compatible_with_current_platform(interpreter, targets.platforms):
log("WARNING: attempting to run PEX with incompatible platforms!", V=1)
Expand Down
2 changes: 1 addition & 1 deletion pex/platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

def _normalize_platform(platform):
# type: (str) -> str
return platform.replace("-", "_").replace(".", "_")
return platform.lower().replace("-", "_").replace(".", "_")


@attr.s(frozen=True)
Expand Down
15 changes: 15 additions & 0 deletions pex/resolve/resolver_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,24 @@ class PexRepositoryConfiguration(object):
network_configuration = attr.ib(default=NetworkConfiguration()) # type: NetworkConfiguration
transitive = attr.ib(default=True) # type: bool

@property
def repos_configuration(self):
# type: () -> ReposConfiguration
return ReposConfiguration()


@attr.s(frozen=True)
class LockRepositoryConfiguration(object):
parse_lock = attr.ib() # type: Callable[[], Union[Lockfile, Error]]
lock_file_path = attr.ib() # type: str
pip_configuration = attr.ib() # type: PipConfiguration

@property
def repos_configuration(self):
# type: () -> ReposConfiguration
return self.pip_configuration.repos_configuration

@property
def network_configuration(self):
# type: () -> NetworkConfiguration
return self.pip_configuration.network_configuration
202 changes: 202 additions & 0 deletions pex/scie/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import os.path
from argparse import Namespace, _ActionsContainer

from pex.compatibility import urlparse
from pex.fetcher import URLFetcher
from pex.orderedset import OrderedSet
from pex.pep_440 import Version
from pex.scie import science
from pex.scie.model import (
ScieConfiguration,
ScieInfo,
ScieOptions,
SciePlatform,
ScieStyle,
ScieTarget,
)
from pex.scie.science import SCIENCE_RELEASES_URL, SCIENCE_REQUIREMENT
from pex.typing import TYPE_CHECKING, cast
from pex.variables import ENV, Variables

if TYPE_CHECKING:
from typing import Iterator, Optional, Tuple, Union


__all__ = (
"ScieConfiguration",
"ScieInfo",
"SciePlatform",
"ScieStyle",
"ScieTarget",
"build",
)


def register_options(parser):
# type: (_ActionsContainer) -> None

parser.add_argument(
"--scie",
"--par",
Copy link
Member Author

@jsirois jsirois Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the issues attached to this PR prove, people in the world know "PAR"; so it seems to make sense to add a --par alias to this one option for discoverability by those people. If they need more than the default --par treatment, then they really must learn about scies and --scie-* advanced options anyhow.

dest="scie_style",
default=None,
type=ScieStyle.for_value,
choices=ScieStyle.values(),
help=(
"Create one or more native executable scies from your PEX that include a portable "
"CPython interpreter along with your PEX making for a truly hermetic PEX that can run "
"on machines with no Python installed at all. If your PEX has multiple targets, "
"whether `--platform`s, `--complete-platform`s or local interpreters in any "
"combination, then one PEX scie will be made for each platform, selecting the latest "
"compatible portable CPython interpreter. Note that only CPython>=3.8 is supported. If "
"you'd like to explicitly control the target platforms or the exact portable CPython "
"selected, see `--scie-platform`, `--scie-pbs-release` and `--scie-python-version`. "
"Specifying `--scie {lazy}` will fetch the portable CPython interpreter just in time "
"on first boot of the PEX scie on a given machine if needed. The URL(s) to fetch the "
"portable CPython interpreter from can be customized by exporting the "
"PEX_BOOTSTRAP_URLS environment variable pointing to a json file with the format: "
'`{{"ptex": {{<file name 1>: <url>, ...}}}}` where the file names should match those '
"found via `SCIE=inspect <the PEX scie> | jq .ptex` with appropriate replacement URLs. "
"Specifying `--scie {eager}` will embed the portable CPython interpreter in your PEX "
"scie making for a larger file, but requiring no internet access to boot. If you have "
"customization needs not addressed by the Pex `--scie*` options, consider using "
"`science` to build your scies (which is what Pex uses behind the scenes); see: "
"https://science.scie.app.".format(lazy=ScieStyle.LAZY, eager=ScieStyle.EAGER)
Comment on lines +69 to +72
Copy link
Member Author

@jsirois jsirois Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sureshjoshi I see that your Pants plugin accepts an optional custom lift manifest, parses it if present, then injects bits into it. I think to support that sort of thing in a principled way, I'd have to parse the user supplied manifest and confirm they do not set the following keys:

  • ptex
  • scie_jump
  • files: with matching names
  • interpreters or interpreter_groups: with matching ids
  • commands: with a default command (I use this to launch the PEX)
  • bindings: with a matching name (needed for the default command to work)

Additionally, I'd have to advertise that I bind ptex to "ptex" for lazy scies, and always bind configure:PYTHON and configure:PEX.

Without all this I don't see how the user supplied manifest can work with Pex needs fruitfully. Can you think of any other corners? Perhaps I'm overthinking. Do you need this functionality?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess for ptex and scie_jump I could allow user-specified versions (but no more) IFF those versions were compatible with a lower bound.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was hacking around tonight, trying to envision how I'd re-build something like pantsible (for example).

One idea was to manipulate the embedded manifest after pex generates it (add the custom bindings and whatnot by piping the file to another tool), but then I realized I don't think I'd want to be able to dynamically modify the manifest of what should be a "sealed" binary, as that would be crazy for supply chain purposes - and I don't want to be able to dynamically alter the commands the executable could call.

In the case of the plugin (which, I wouldn't really use as a reference for anything - as I made it a few years ago to solve an immediate deployment problem on a client project), I think we try to use the optional lift.toml where possible and inject the target names under certain conditions.

For this PR, I don't see any problems with deferring all of those concerns, but I'm of two minds.

  • pex being able to accept a custom manifest template that has to be perfectly structured, with/without certain keys feels a bit hacky
  • Using a separate tool (science), which overlaps with a lot of what pex would provide, feels off too

Would it make sense/be possible for science to defer to pex in some way, for the embedded Python interpreter? I'm trying to envision some sort of cleaner composition between two tools which have similar base functionality - but science allows some added knobs.

[lift]
name = "pantsible"
description = "Ansible with an embedded Python interpreter."
platforms = ... inferred from pex ...

[[lift.interpreters]] -> ... inferred from pex ...

[[lift.files]]
name = "pex"

[[lift.commands]]
name = "ansible"
exe = "{scie.bindings.venv}/venv/bin/ansible"
args = []

...

Although, one immediate problem I see here... I think I'm conflating a pex file with the pex CLI.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading through the PR, another thought that popped into my head is allowing for the pex CLI's generated TOML to act as an overlay or merge-manifest with a local one.

Whether that functionality is in science or pex CLI - overlaying/overwriting the user created manifest seems reasonable.

Copy link
Member Author

@jsirois jsirois Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was hacking around tonight, trying to envision how I'd re-build something like pantsible (for example).

Well, pantsible uses a feature specific to scies over and above a PEX, namely the BusyBox support. It makes sense to me to just directly support this with --scie-busybox [list of entry points]. If you specify that then Pex emits a manifest with no default command and just named commands for each listed entry point.

Would it make sense/be possible for science to defer to pex in some way, for the embedded Python interpreter? I'm trying to envision some sort of cleaner composition between two tools which have similar base functionality - but science allows some added knobs.

Well, science is general purpose - Any language; so it doesn't really make sense for it to know about Python let alone Pex. It does have a Provider interface to supply interpreters and that has exactly 1 implementation currently, that provides PBS interpreters. A PEX provider might make sense.

That said, Pex creates PEXes - single file executables. These do not have:

  1. BusyBox support: You need conscript, for example, for that.
  2. Bindings support: I.E.: Pex offers you no way to do pre-launch setup. You just have to write Python code to do 1 time setup in your main if you want that or provide alternate entry points fired off with {PEX_MODULE=foo,PEX_SCRIPT=bar} ./my.pex

As such, I think it makes sense for Pex to offer the ability to take your PEX file and turn it into a scie that behaves exactly the same, with nothing extra except maybe running faster. Everything you'd do in a custom manifest, afaict, would add things the PEX cannot already do. At that point, having to move up a layer and use science yourself with a custom lift manifest to build your app not using Pex directly makes sense. I.E.: what scie-pants has to do. The Pants app is more complex than just what the Pants PEX does / has tight perf overhead concerns; so it makes sense to move up to the higher layer.

Reading through the PR, another thought that popped into my head is allowing for the pex CLI's generated TOML to act as an overlay or merge-manifest with a local one.

That's exactly what I meant by all this: #2466 (comment) It seems to me you can't just overlay, you must confirm the key mechanisms Pex uses in its lift are not destroyed by the merge before merging.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm referring to downstream tools like science, not pex, in this case. As in "once you've created a pex, then ..."

Anyways, the things I have in my mind are probably out of scope of this PR, and if they're important enough, or strongly enough use-cased, I can open a new ticket later.

Copy link
Member Author

@jsirois jsirois Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. So I think the PEX interpreter Provider would just use the pex3 scie create ... logic I referenced here: #2466 (comment)

I.E.: not create the scie, but use the ScieConfiguration.from_tags API + a given PEX file to source the tags to implement platform / interpreter selection via the calculated ScieConfiguration's ScieTarget targets which include platform, pbs_release and python_version.

That said, the current science Provider interface only allows providing an interpreter and not a set of platforms; so new API work would need to be done in science anyhow it seems to plug all this in.

Copy link
Member Author

@jsirois jsirois Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the current API does allow enough for a PEX interpreter Provider to error when asked to produce an interpreter distribution via Provider.distribution(platform) for a platform the PEX does not support. That's probably actually enough:

[lift]
name = "example"
platforms = [
    "linux-aarch64",
    "linux-x86_64",
    "macos-aarch64",
]

[[lift.files]]
name = "pex"

[[lift.interpreters]]
id = "cpython"
provider = "PEX"
pex = "{pex}"

Here if I ran science lift --file pex=my-py37.pex build ... the PEX interpreter Provider could fail since CPython 3.7 is not supported and if I ran science lift --file pex=my-py38.pex build ... it could fail fast if, for example, there were no 3.8 linux-aarch64 distributions in the latest PBS release.

Copy link
Collaborator

@sureshjoshi sureshjoshi Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, there we go - that's the kinda thing I see value in. One less place where head scratching can take place.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The restricting use case for --scie-platform I mentioned now has a test in 61f55a4 as does auto platforms detection.

),
)
parser.add_argument(
"--scie-platform",
dest="scie_platforms",
default=[],
action="append",
type=SciePlatform.for_value,
choices=SciePlatform.values(),
help=(
"The platform to produce the native PEX scie executable for. Can be specified multiple "
"times."
),
)
parser.add_argument(
"--scie-pbs-release",
dest="scie_pbs_release",
default=None,
type=str,
help=(
"The Python Standalone Builds release to use. Currently releases are dates of the form "
"YYYYMMDD, e.g.: '20240713'. See their GitHub releases page at "
"https://github.com/indygreg/python-build-standalone/releases to discover available "
"releases. If left unspecified the latest release is used. N.B.: The latest lookup is "
"cached for 5 days. To force a fresh lookup you can remove the cache at "
"<USER CACHE DIR>/science/downloads."
),
)
parser.add_argument(
"--scie-python-version",
dest="scie_python_version",
default=None,
type=Version,
help=(
"The portable CPython version to select. Can be either in `<major>.<minor>` form; "
"e.g.: '3.11', or else fully specified as `<major>.<minor>.<patch>`; e.g.: '3.11.3'. "
"If you don't specify this option, Pex will do its best to guess appropriate portable "
"CPython versions. N.B.: Python Standalone Builds does not provide all patch versions; "
"so you should check their releases at "
"https://github.com/indygreg/python-build-standalone/releases if you wish to pin down "
"to the patch level."
),
)
parser.add_argument(
"--scie-science-binary",
dest="scie_science_binary",
default=None,
type=str,
help=(
"The file path of a `science` binary or a URL to use to fetch the `science` binary "
"when there is no `science` on the PATH with a version matching {science_requirement}. "
"Pex uses the official `science` releases at {science_releases_url} by default.".format(
science_requirement=SCIENCE_REQUIREMENT, science_releases_url=SCIENCE_RELEASES_URL
)
),
)


def render_options(options):
# type: (ScieOptions) -> str

args = ["--scie", str(options.style)]
for platform in options.platforms:
args.append("--scie-platform")
args.append(str(platform))
if options.pbs_release:
args.append("--scie-pbs-release")
args.append(options.pbs_release)
if options.python_version:
args.append("--scie-python-version")
args.append(".".join(map(str, options.python_version)))
if options.science_binary_url:
args.append("--scie-science-binary")
args.append(options.science_binary_url)
return " ".join(args)


def extract_options(options):
# type: (Namespace) -> Optional[ScieOptions]

if not options.scie_style:
return None

python_version = None # type: Optional[Union[Tuple[int, int], Tuple[int, int, int]]]
if options.scie_python_version:
if (
not options.scie_python_version.parsed_version.release
or len(options.scie_python_version.parsed_version.release) < 2
):
raise ValueError(
"Invalid Python version: '{python_version}'.\n"
"Must be in the form `<major>.<minor>` or `<major>.<minor>.<patch>`".format(
python_version=options.scie_python_version
)
)
python_version = cast(
"Union[Tuple[int, int], Tuple[int, int, int]]",
options.scie_python_version.parsed_version.release,
)
if python_version < (3, 8):
raise ValueError(
"Invalid Python version: '{python_version}'.\n"
"Scies are built using Python Standalone Builds which only supports Python >=3.8.\n"
"To find supported Python versions, you can browse the releases here:\n"
" https://github.com/indygreg/python-build-standalone/releases".format(
python_version=options.scie_python_version
)
)

science_binary_url = options.scie_science_binary
if science_binary_url:
url_info = urlparse.urlparse(options.scie_science_binary)
if not url_info.scheme and url_info.path and os.path.isfile(url_info.path):
science_binary_url = "file://{path}".format(path=os.path.abspath(url_info.path))

return ScieOptions(
style=options.scie_style,
platforms=tuple(OrderedSet(options.scie_platforms)),
pbs_release=options.scie_pbs_release,
python_version=python_version,
science_binary_url=science_binary_url,
)


def build(
configuration, # type: ScieConfiguration
pex_file, # type: str
url_fetcher=None, # type: Optional[URLFetcher]
env=ENV, # type: Variables
):
# type: (...) -> Iterator[ScieInfo]

return science.build(configuration, pex_file, url_fetcher=url_fetcher, env=env)
30 changes: 30 additions & 0 deletions pex/scie/configure-binding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import print_function

import os
import sys


def write_bindings(
env_file, # type: str
installed_pex_dir, # type: str
):
# type: (...) -> None
with open(env_file, "a") as fp:
print("PYTHON=" + sys.executable, file=fp)
print("PEX=" + os.path.realpath(os.path.join(installed_pex_dir, "__main__.py")), file=fp)


if __name__ == "__main__":
write_bindings(
env_file=os.environ["SCIE_BINDING_ENV"],
installed_pex_dir=(
# The zipapp case:
os.environ["_PEX_SCIE_INSTALLED_PEX_DIR"]
jsirois marked this conversation as resolved.
Show resolved Hide resolved
# The --venv case:
or os.environ.get("VIRTUAL_ENV", os.path.dirname(os.path.dirname(sys.executable)))
),
)
sys.exit(0)
Loading
Loading