From 6121d1e9cee8d10a703cf5a97d391c743448b05f Mon Sep 17 00:00:00 2001 From: Ignas <240938+aignas@users.noreply.github.com> Date: Tue, 29 Aug 2023 08:48:39 +0900 Subject: [PATCH] feat(pip_parse): support patching 'whl_library' Before that the users had to rely on patching the actual wheel files and uploading them as different versions to internal artifact stores if they needed to modify the wheel dependencies. This is very common when breaking dependency cycles in `pytorch` or `apache-airflow` packages. With this feature we can support patching external PyPI dependencies via unified patches passed into the `pip.whl_mods` extension and the legacy `package_annotation` macro. Fixes #1076. Add a non-empty patch and show that there can be multiple patches exp: A different design, that does not require us to put patches to annotations.json Simplify the design and add extra notes in the implementation fix: make the legacy WORKSPACE patching compatible with bazel 5.4 chore: update docs refactor: s/module_override/whl_override doc: update changelog doc: improve documentation and code comments on the new features s/whl_override/whl_library_override/g refactor wheel installer feat: support whl_overriding before extraction Add a note doc: update changelog fixup: set better default values for patches chore: add wheel_repackager.py to the list of pysrcs fix rebase conflicts fix: use better defaults for annotations fixup: update docs fixup: minor tidy up add a comment on annotation support for bzlmod refactor: whl patching to a separate function finish cleaning up handling of whl_patches for python annotations Add an empty patch to the pip_repository_annotations example Move patch argument processing to a single place, next to annotations Improve the script and add logging feat!: remove legacy bzlmod patching example doc: remove changelog entry for legacy patching feat!: remove the patching support via pip annotations feat!: remove support for whl_library patching for now --- .bazelrc | 4 +- CHANGELOG.md | 3 + docs/pip_repository.md | 3 +- examples/bzlmod/MODULE.bazel | 9 ++ examples/bzlmod/patches/BUILD.bazel | 4 + examples/bzlmod/patches/empty.patch | 0 python/extensions/pip.bzl | 90 ++++++++++++++++++- python/pip_install/pip_repository.bzl | 90 ++++++++++++++++++- python/pip_install/private/srcs.bzl | 1 + .../tools/wheel_installer/BUILD.bazel | 7 ++ .../tools/wheel_installer/arguments.py | 14 +++ .../tools/wheel_installer/wheel_installer.py | 86 ++++++++++-------- .../tools/wheel_installer/wheel_repackager.py | 78 ++++++++++++++++ 13 files changed, 343 insertions(+), 46 deletions(-) create mode 100644 examples/bzlmod/patches/BUILD.bazel create mode 100644 examples/bzlmod/patches/empty.patch create mode 100755 python/pip_install/tools/wheel_installer/wheel_repackager.py diff --git a/.bazelrc b/.bazelrc index 39b28d12e6..fc4697712d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -3,8 +3,8 @@ # This lets us glob() up all the files inside the examples to make them inputs to tests # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, run tools/bazel_integration_test/update_deleted_packages.sh -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_install,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_install,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_install,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/pip_repository_annotations/patches,examples/py_proto_library,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_install,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/pip_repository_annotations/patches,examples/py_proto_library,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points test --test_output=errors diff --git a/CHANGELOG.md b/CHANGELOG.md index ed3a60d889..136256d767 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,9 @@ A brief description of the categories of changes: * `//python:packaging_bzl` added, a `bzl_library` for the Starlark files `//python:packaging.bzl` requires. +* (bzlmod) Added patching support via `patches` and `patch_strip` arguments to + the new `pip.whl_override` tag class. + ### Removed * (bzlmod) The `entry_point` macro is no longer supported and has been removed diff --git a/docs/pip_repository.md b/docs/pip_repository.md index 453ca29713..7b9d4f014e 100644 --- a/docs/pip_repository.md +++ b/docs/pip_repository.md @@ -109,7 +109,7 @@ py_binary(
whl_library(name, annotation, download_only, enable_implicit_namespace_pkgs, environment, extra_pip_args, isolated, pip_data_exclude, python_interpreter, python_interpreter_target, - quiet, repo, repo_mapping, repo_prefix, requirement, timeout) + quiet, repo, repo_mapping, repo_prefix, requirement, timeout, whl_patches)@@ -137,6 +137,7 @@ Instantiated from pip_repository and inherits config options from there. | repo_prefix | Prefix for the generated packages will be of the form
@<prefix><sanitized-package-name>//...
| String | optional | ""
|
| requirement | Python requirement string describing the package to make available | String | required | |
| timeout | Timeout (in seconds) on the rule's execution duration. | Integer | optional | 600
|
+| whl_patches | Patches to be applied after building/downloading the '.whl' file before generating BUILD.bazel files and extracting it. INTERNAL USE ONLY. | Dictionary: Label -> String | optional | {}
|
diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel
index 0d1c7a736b..9faa8c92c6 100644
--- a/examples/bzlmod/MODULE.bazel
+++ b/examples/bzlmod/MODULE.bazel
@@ -113,6 +113,15 @@ pip.parse(
"@whl_mods_hub//:wheel.json": "wheel",
},
)
+
+# You can add patches that will be applied on the extracted whl contents
+pip.whl_override(
+ file = "requests-2.25.1-py2.py3-none-any.whl",
+ patch_strip = 1,
+ patches = [
+ "@//patches:empty.patch",
+ ],
+)
use_repo(pip, "pip")
bazel_dep(name = "other_module", version = "", repo_name = "our_other_module")
diff --git a/examples/bzlmod/patches/BUILD.bazel b/examples/bzlmod/patches/BUILD.bazel
new file mode 100644
index 0000000000..ed2af796bb
--- /dev/null
+++ b/examples/bzlmod/patches/BUILD.bazel
@@ -0,0 +1,4 @@
+exports_files(
+ srcs = glob(["*.patch"]),
+ visibility = ["//visibility:public"],
+)
diff --git a/examples/bzlmod/patches/empty.patch b/examples/bzlmod/patches/empty.patch
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/extensions/pip.bzl b/python/extensions/pip.bzl
index f94f18c619..567bcade91 100644
--- a/python/extensions/pip.bzl
+++ b/python/extensions/pip.bzl
@@ -77,7 +77,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
whl_mods = whl_mods,
)
-def _create_whl_repos(module_ctx, pip_attr, whl_map):
+def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
python_interpreter_target = pip_attr.python_interpreter_target
# if we do not have the python_interpreter set in the attributes
@@ -96,9 +96,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map):
))
python_interpreter_target = INTERPRETER_LABELS[python_name]
+ python_version = version_label(pip_attr.python_version)
pip_name = "{}_{}".format(
hub_name,
- version_label(pip_attr.python_version),
+ python_version,
)
requrements_lock = locked_requirements_label(module_ctx, pip_attr)
@@ -124,12 +125,17 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map):
# to.
annotation = whl_modifications.get(whl_name)
whl_name = normalize_name(whl_name)
+
whl_library(
name = "%s_%s" % (pip_name, whl_name),
requirement = requirement_line,
repo = pip_name,
repo_prefix = pip_name + "_",
annotation = annotation,
+ whl_patches = {
+ p: json.encode(args)
+ for p, args in whl_overrides.get(whl_name, {}).items()
+ },
python_interpreter = pip_attr.python_interpreter,
python_interpreter_target = python_interpreter_target,
quiet = pip_attr.quiet,
@@ -147,6 +153,42 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map):
whl_map[hub_name][whl_name][full_version(pip_attr.python_version)] = pip_name + "_"
+def _parse_whl_name(file):
+ if not file.endswith(".whl"):
+ fail("not a valid wheel: {}".format(file))
+
+ file = file[:-len(".whl")]
+
+ # Parse the following
+ # {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl
+ head, _, platform_tag = file.rpartition("-")
+ if not platform_tag:
+ fail("cannot extract platform tag from the whl filename: {}".format(file))
+ head, _, abi_tag = head.rpartition("-")
+ if not abi_tag:
+ fail("cannot extract abi tag from the whl filename: {}".format(file))
+ head, _, python_tag = head.rpartition("-")
+ if not python_tag:
+ fail("cannot extract python tag from the whl filename: {}".format(file))
+ head, _, version = head.rpartition("-")
+ if not version:
+ fail("cannot extract version from the whl filename: {}".format(file))
+ distribution, _, maybe_version = head.partition("-")
+
+ if maybe_version:
+ version, build_tag = maybe_version, version
+ else:
+ build_tag = None
+
+ return struct(
+ distribution = distribution,
+ version = version,
+ build_tag = build_tag,
+ python_tag = python_tag,
+ abi_tag = abi_tag,
+ platform_tag = platform_tag,
+ )
+
def _pip_impl(module_ctx):
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
@@ -216,6 +258,29 @@ def _pip_impl(module_ctx):
# Build all of the wheel modifications if the tag class is called.
_whl_mods_impl(module_ctx)
+ _overriden_whl_set = {}
+ whl_overrides = {}
+
+ for module in module_ctx.modules:
+ for attr in module.tags.whl_override:
+ whl_name = normalize_name(_parse_whl_name(attr.file).distribution)
+
+ if attr.file in _overriden_whl_set:
+ fail("Duplicate module overrides for '{}'".format(attr.file))
+ _overriden_whl_set[attr.file] = None
+
+ for patch in attr.patches:
+ if whl_name not in whl_overrides:
+ whl_overrides[whl_name] = {}
+
+ if patch not in whl_overrides[whl_name]:
+ whl_overrides[whl_name][patch] = struct(
+ patch_strip = attr.patch_strip,
+ whls = [],
+ )
+
+ whl_overrides[whl_name][patch].whls.append(attr.file)
+
# Used to track all the different pip hubs and the spoke pip Python
# versions.
pip_hub_map = {}
@@ -260,7 +325,7 @@ def _pip_impl(module_ctx):
else:
pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)
- _create_whl_repos(module_ctx, pip_attr, hub_whl_map)
+ _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides)
for hub_name, whl_map in hub_whl_map.items():
pip_hub_repository_bzlmod(
@@ -380,6 +445,24 @@ cannot have a child module that uses the same `hub_name`.
}
return attrs
+_whl_override_tag = tag_class(
+ attrs = {
+ "file": attr.string(
+ doc = """The Python wheel name which needs to be patched. This will be applied to all repositories that setup this wheel via the pip.parse tag class.""",
+ mandatory = True,
+ ),
+ "patch_strip": attr.int(
+ default = 0,
+ doc = "The number of leading path segments to be stripped from the file name in the patches.",
+ ),
+ "patches": attr.label_list(
+ doc = "A list of patches to apply to the repository *after* 'whl_library' is extracted and BUILD.bazel file is generated.",
+ mandatory = True,
+ ),
+ },
+ doc = "Apply patches to a given Python wheel library defined by other tags in this extension.",
+)
+
pip = module_extension(
doc = """\
This extension is used to make dependencies from pip available.
@@ -421,6 +504,7 @@ JSON files where referred to as annotations, and were renamed to whl_modificatio
extension.
""",
),
+ "whl_override": _whl_override_tag,
},
)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index ea8b9eb5ac..037103f6ce 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -572,6 +572,51 @@ py_binary(
environ = common_env,
)
+def _patch_whl_file(rctx, *, python_interpreter, whl_path, patches, **kwargs):
+ """Patch a whl file and repack it to ensure that the RECORD metadata stays correct.
+
+ Args:
+ rctx: repository_ctx
+ python_interpreter: the host python interpreter used for executing a script.
+ whl_path: The whl file name to be patched.
+ patches: a label-keyed-string dict that has
+ json.encode(struct([whl_file], patch_strip]) as values. This
+ is to maintain flexibility and correct bzlmod extension interface
+ until we have a better way to define whl_library and move whl
+ patching to a separate place.
+ **kwargs: extras passed to rctx.execute.
+ """
+
+ # extract files into the current directory for patching as rctx.patch
+ # does not support patching in another directory.
+ rctx.extract(whl_path)
+
+ whl_file = rctx.path(whl_path).basename[:-len(".orig.zip")]
+
+ found_a_match = False
+ whls_not_found = []
+ for patch_file, json_args in patches.items():
+ patch_dst = struct(**json.decode(json_args))
+ if whl_file in patch_dst.whls:
+ rctx.patch(patch_file, strip = patch_dst.patch_strip)
+ found_a_match = True
+ else:
+ whls_not_found.extend(patch_dst.whls)
+
+ # Should we parse the passed whl_names and match it to `whl_file` for better errors?
+ if not found_a_match:
+ fail("Could not find a match for {} in {}".format(whl_file, whls_not_found))
+
+ return rctx.execute(
+ [
+ python_interpreter,
+ "-m",
+ "python.pip_install.tools.wheel_installer.wheel_repackager",
+ whl_path,
+ ],
+ **kwargs
+ )
+
def _whl_library_impl(rctx):
python_interpreter = _resolve_python_interpreter(rctx)
args = [
@@ -584,10 +629,41 @@ def _whl_library_impl(rctx):
args = _parse_optional_attrs(rctx, args)
+ # Manually construct the PYTHONPATH since we cannot use the toolchain here
+ environment = _create_repository_execution_environment(rctx, python_interpreter)
+
result = rctx.execute(
- args,
+ args + ["--no-extract"] + (["--rename-to-zip"] if rctx.attr.whl_patches else []),
+ environment = environment,
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+ if result.return_code:
+ fail("whl_library %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
+
+ whl_path = json.decode(rctx.read("whl_file.json"))["whl_file"]
+ if not rctx.delete("whl_file.json"):
+ fail("failed to delete the whl_file.json file")
+
+ if rctx.attr.whl_patches:
+ result = _patch_whl_file(
+ rctx,
+ python_interpreter = python_interpreter,
+ whl_path = whl_path,
+ patches = rctx.attr.whl_patches,
+ environment = environment,
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+ if result.return_code:
+ fail("repackaging .whl %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
+
+ whl_path = whl_path.replace(".orig.zip", "")
+
+ result = rctx.execute(
+ args + ["--whl-file", whl_path],
# Manually construct the PYTHONPATH since we cannot use the toolchain here
- environment = _create_repository_execution_environment(rctx, python_interpreter),
+ environment = environment,
quiet = rctx.attr.quiet,
timeout = rctx.attr.timeout,
)
@@ -618,6 +694,11 @@ def _whl_library_impl(rctx):
)
entry_points[entry_point_without_py] = entry_point_script_name
+ annotation = None
+ if rctx.attr.annotation:
+ json_contents = json.decode(rctx.read(rctx.attr.annotation))
+ annotation = struct(**json_contents)
+
build_file_contents = generate_whl_library_build_bazel(
repo_prefix = rctx.attr.repo_prefix,
dependencies = metadata["deps"],
@@ -627,7 +708,7 @@ def _whl_library_impl(rctx):
"pypi_version=" + metadata["version"],
],
entry_points = entry_points,
- annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))),
+ annotation = annotation,
)
rctx.file("BUILD.bazel", build_file_contents)
@@ -677,6 +758,9 @@ whl_library_attrs = {
mandatory = True,
doc = "Python requirement string describing the package to make available",
),
+ "whl_patches": attr.label_keyed_string_dict(
+ doc = "Patches to be applied after building/downloading the '.whl' file before generating BUILD.bazel files and extracting it. INTERNAL USE ONLY.",
+ ),
"_python_path_entries": attr.label_list(
# Get the root directory of these rules and keep them as a default attribute
# in order to avoid unnecessary repository fetching restarts.
diff --git a/python/pip_install/private/srcs.bzl b/python/pip_install/private/srcs.bzl
index e342d90757..bfcda3cad6 100644
--- a/python/pip_install/private/srcs.bzl
+++ b/python/pip_install/private/srcs.bzl
@@ -13,4 +13,5 @@ PIP_INSTALL_PY_SRCS = [
"@rules_python//python/pip_install/tools/wheel_installer:namespace_pkgs.py",
"@rules_python//python/pip_install/tools/wheel_installer:wheel.py",
"@rules_python//python/pip_install/tools/wheel_installer:wheel_installer.py",
+ "@rules_python//python/pip_install/tools/wheel_installer:wheel_repackager.py",
]
diff --git a/python/pip_install/tools/wheel_installer/BUILD.bazel b/python/pip_install/tools/wheel_installer/BUILD.bazel
index 0eadcc25f6..d3c6ebe096 100644
--- a/python/pip_install/tools/wheel_installer/BUILD.bazel
+++ b/python/pip_install/tools/wheel_installer/BUILD.bazel
@@ -17,6 +17,13 @@ py_library(
],
)
+py_binary(
+ name = "wheel_repackager",
+ srcs = [
+ "wheel_repackager.py",
+ ],
+)
+
py_binary(
name = "wheel_installer",
srcs = [
diff --git a/python/pip_install/tools/wheel_installer/arguments.py b/python/pip_install/tools/wheel_installer/arguments.py
index aac3c012b7..f565de2689 100644
--- a/python/pip_install/tools/wheel_installer/arguments.py
+++ b/python/pip_install/tools/wheel_installer/arguments.py
@@ -14,6 +14,7 @@
import argparse
import json
+import pathlib
from typing import Any
@@ -59,6 +60,19 @@ def parser(**kwargs: Any) -> argparse.ArgumentParser:
help="Use 'pip download' instead of 'pip wheel'. Disables building wheels from source, but allows use of "
"--platform, --python-version, --implementation, and --abi in --extra_pip_args.",
)
+ parser.add_argument(
+ "--whl-file", type=pathlib.Path, help="The file to be used for extraction."
+ )
+ parser.add_argument(
+ "--no-extract",
+ action="store_true",
+ help="Whether to extract the downloaded file.",
+ )
+ parser.add_argument(
+ "--rename-to-zip",
+ action="store_true",
+ help="Whether to rename the whl file to zip for easier patching.",
+ )
return parser
diff --git a/python/pip_install/tools/wheel_installer/wheel_installer.py b/python/pip_install/tools/wheel_installer/wheel_installer.py
index c6c29615c3..fbd24a4f8a 100644
--- a/python/pip_install/tools/wheel_installer/wheel_installer.py
+++ b/python/pip_install/tools/wheel_installer/wheel_installer.py
@@ -155,45 +155,57 @@ def main() -> None:
_configure_reproducible_wheels()
- pip_args = (
- [sys.executable, "-m", "pip"]
- + (["--isolated"] if args.isolated else [])
- + (["download", "--only-binary=:all:"] if args.download_only else ["wheel"])
- + ["--no-deps"]
- + deserialized_args["extra_pip_args"]
- )
+ if not args.whl_file:
+ pip_args = (
+ [sys.executable, "-m", "pip"]
+ + (["--isolated"] if args.isolated else [])
+ + (["download", "--only-binary=:all:"] if args.download_only else ["wheel"])
+ + ["--no-deps"]
+ + deserialized_args["extra_pip_args"]
+ )
- requirement_file = NamedTemporaryFile(mode="wb", delete=False)
- try:
- requirement_file.write(args.requirement.encode("utf-8"))
- requirement_file.flush()
- # Close the file so pip is allowed to read it when running on Windows.
- # For more information, see: https://bugs.python.org/issue14243
- requirement_file.close()
- # Requirement specific args like --hash can only be passed in a requirements file,
- # so write our single requirement into a temp file in case it has any of those flags.
- pip_args.extend(["-r", requirement_file.name])
-
- env = os.environ.copy()
- env.update(deserialized_args["environment"])
- # Assumes any errors are logged by pip so do nothing. This command will fail if pip fails
- subprocess.run(pip_args, check=True, env=env)
- finally:
+ requirement_file = NamedTemporaryFile(mode="wb", delete=False)
try:
- os.unlink(requirement_file.name)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
-
- name, extras_for_pkg = _parse_requirement_for_extra(args.requirement)
- extras = {name: extras_for_pkg} if extras_for_pkg and name else dict()
-
- whl = next(iter(glob.glob("*.whl")))
- _extract_wheel(
- wheel_file=whl,
- extras=extras,
- enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs,
- )
+ requirement_file.write(args.requirement.encode("utf-8"))
+ requirement_file.flush()
+ # Close the file so pip is allowed to read it when running on Windows.
+ # For more information, see: https://bugs.python.org/issue14243
+ requirement_file.close()
+ # Requirement specific args like --hash can only be passed in a requirements file,
+ # so write our single requirement into a temp file in case it has any of those flags.
+ pip_args.extend(["-r", requirement_file.name])
+
+ env = os.environ.copy()
+ env.update(deserialized_args["environment"])
+ # Assumes any errors are logged by pip so do nothing. This command will fail if pip fails
+ subprocess.run(pip_args, check=True, env=env)
+ finally:
+ try:
+ os.unlink(requirement_file.name)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+ whl = Path(next(iter(glob.glob("*.whl"))))
+ else:
+ whl = Path(args.whl_file)
+
+ if args.no_extract:
+ # rename the zip file so that `repository_ctx.extract` can detect that it is a zip file and so that
+ # the default glob for the `whl` filegroup does not match the original wheel.
+ if args.rename_to_zip:
+ whl = whl.rename(f"{whl}" + ".orig.zip")
+ print(f"Saved a whl file to: {whl}")
+ with open("whl_file.json", "w") as f:
+ json.dump({"whl_file": f"{whl.resolve()}"}, f)
+ else:
+ name, extras_for_pkg = _parse_requirement_for_extra(args.requirement)
+ extras = {name: extras_for_pkg} if extras_for_pkg and name else dict()
+ _extract_wheel(
+ wheel_file=whl,
+ extras=extras,
+ enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs,
+ )
if __name__ == "__main__":
diff --git a/python/pip_install/tools/wheel_installer/wheel_repackager.py b/python/pip_install/tools/wheel_installer/wheel_repackager.py
new file mode 100755
index 0000000000..0bb843799e
--- /dev/null
+++ b/python/pip_install/tools/wheel_installer/wheel_repackager.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Regenerate a whl file after patching and cleanup the patched contents.
+
+This script will take contents of the current directory and create a new wheel out of it and will remove all files
+that were written to the wheel.
+"""
+
+import base64
+import hashlib
+import logging
+import os
+import pathlib
+import sys
+import tempfile
+import zipfile
+
+
+def _create_wheel(whl: zipfile.ZipFile, dir: pathlib.Path):
+ record_path = None
+ record = []
+
+ for p in dir.rglob("*"):
+ rel_path = str(p.relative_to(dir))
+ if p.name == "RECORD":
+ record_path = p
+ logging.debug(f"Found a RECORD: {record_path}")
+ elif not p.is_dir():
+ digest = hashlib.sha256(p.read_bytes())
+ safe_hash = (
+ base64.urlsafe_b64encode(digest.digest()).decode("us-ascii").rstrip("=")
+ )
+
+ record.append(f"{rel_path},sha256={safe_hash},{os.path.getsize(p)}")
+ whl.write(p, rel_path)
+ logging.debug(f"Wrote: {record[-1]}")
+
+ assert record_path, "RECORD was not found in the archive"
+
+ rel_path = record_path.relative_to(dir)
+ record.append(f"{rel_path},,")
+ record_path.write_text("\n".join(record))
+ whl.write(record_path, rel_path)
+ logging.debug(f"Wrote: {record[-1]}")
+
+
+def main():
+ logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG)
+
+ cwd = pathlib.Path.cwd()
+ logging.debug("=" * 80)
+ logging.debug("Repackaging the wheel")
+ logging.debug("=" * 80)
+
+ with tempfile.TemporaryDirectory(dir=cwd) as tmpdir:
+ patched_wheel_dir = cwd / tmpdir
+ logging.debug(f"Created a tmpdir: {patched_wheel_dir}")
+ input_file = pathlib.Path(sys.argv[1])
+ whl_path = pathlib.Path(sys.argv[1].replace(".whl.orig.zip", ".whl"))
+
+ logging.debug("Moving whl contents to the newly created tmpdir")
+ for p in cwd.glob("*"):
+ if p == input_file or p == patched_wheel_dir:
+ logging.debug(f"Ignoring: {p}")
+ continue
+
+ rel_path = p.relative_to(cwd)
+ dst = p.rename(patched_wheel_dir / rel_path)
+ logging.debug(f"mv {p} -> {dst}")
+
+ with zipfile.ZipFile(whl_path, "w") as whl:
+ _create_wheel(whl, patched_wheel_dir)
+
+ logging.info(f"Created a whl file: {whl_path}")
+
+
+if __name__ == "__main__":
+ main()