Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor/fix: store dists in parse_requirements output #1917

Merged
merged 25 commits into from
Jun 1, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d65b3fb
parse_requirements: add whls and sdists attr
aignas May 22, 2024
1d7be8d
refactor: split out a function for selecting a list of wheels instead…
aignas May 23, 2024
50ff113
Handle correctly when the target platform has a python version in the…
aignas May 23, 2024
823cc03
add a note
aignas May 23, 2024
cb1ecd1
wip
aignas May 23, 2024
1800f77
docs: add bzl_library targets
aignas May 23, 2024
6dff723
fixes for #1930
aignas May 30, 2024
552c526
filter out non cp or py wheels
aignas May 30, 2024
348ca06
rewrite the filtering algorithm and harden the code
aignas May 31, 2024
341cf84
Merge branch 'main' into refactor/store-dists-in-reqs
aignas May 31, 2024
0261471
minor cleanup
aignas May 31, 2024
098835f
add a logger to the pip extension
aignas May 31, 2024
900e617
comment: add a logger to repo_utils and accept a lambda instead of st…
aignas Jun 1, 2024
90eaf3b
comment: clarify docstring
aignas Jun 1, 2024
aef1bb8
comment: clarify parameter s/want_version/want_python_version
aignas Jun 1, 2024
6dc2e62
comment: cryptic comment
aignas Jun 1, 2024
adc7708
comment: set usage
aignas Jun 1, 2024
24fa6a5
comment: describe why musl is special cased
aignas Jun 1, 2024
dec7455
comment: why we are getting sdists[0]
aignas Jun 1, 2024
2cd9502
comment: the add_dists is too coupled
aignas Jun 1, 2024
fed02eb
cleanup
aignas Jun 1, 2024
6e4ee51
fixup tests
aignas Jun 1, 2024
c9158aa
fixup debugging statements
aignas Jun 1, 2024
48b8a71
finish the debugging setup
aignas Jun 1, 2024
0fad448
clenaup bzl_library
aignas Jun 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ A brief description of the categories of changes:
"panic: runtime error: invalid memory address or nil pointer dereference"
* (bzlmod) remove `pip.parse(annotations)` attribute as it is unused and has been
replaced by whl_modifications.
* (pip) Correctly select wheels when the python tag includes minor versions.
See ([#1930](https://github.com/bazelbuild/rules_python/issues/1930))

### Added
* (rules) Precompiling Python source at build time is available. but is
Expand Down
8 changes: 8 additions & 0 deletions python/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ bzl_library(
],
)

bzl_library(
name = "parse_requirements_add_dists_bzl",
srcs = ["parse_requirements_add_dists.bzl"],
deps = [
":whl_target_platforms_bzl",
],
)

bzl_library(
name = "parse_whl_name_bzl",
srcs = ["parse_whl_name.bzl"],
Expand Down
1 change: 1 addition & 0 deletions python/private/bzlmod/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ bzl_library(
"//python/private:full_version_bzl",
"//python/private:normalize_name_bzl",
"//python/private:parse_requirements_bzl",
"//python/private:parse_requirements_add_dists_bzl",
"//python/private:parse_whl_name_bzl",
"//python/private:version_label_bzl",
":bazel_features_bzl",
Expand Down
83 changes: 52 additions & 31 deletions python/private/bzlmod/pip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ load(
load("//python/private:auth.bzl", "AUTH_ATTRS")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
load("//python/private:parse_requirements_add_dists.bzl", "parse_requirements_add_dists")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:pypi_index.bzl", "simpleapi_download")
load("//python/private:render_pkg_aliases.bzl", "whl_alias")
Expand Down Expand Up @@ -99,7 +100,30 @@ You cannot use both the additive_build_content and additive_build_content_file a
whl_mods = whl_mods,
)

def _new_logger(verbosity_level = None):
verbosity = {
"DEBUG": 2,
"INFO": 1,
"TRACE": 3,
}.get(verbosity_level, 0)

# buildifier: disable=print
def _log(enabled_on_verbosity, level, *args):
if verbosity < enabled_on_verbosity:
return
print("{}: ".format(level.upper()), *args)

return struct(
trace = lambda *args: _log(3, "TRACE", *args),
debug = lambda *args: _log(2, "DEBUG", *args),
info = lambda *args: _log(1, "INFO", *args),
# buildifier: disable=print
warn = lambda *args: print("WARNING: ", *args),
fail = lambda *args: fail(*args),
)

def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache):
logger = _new_logger(pip_attr.verbosity)
python_interpreter_target = pip_attr.python_interpreter_target

# if we do not have the python_interpreter set in the attributes
Expand Down Expand Up @@ -170,7 +194,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
extra_pip_args = pip_attr.extra_pip_args,
)

index_urls = {}
if pip_attr.experimental_index_url:
if pip_attr.download_only:
fail("Currently unsupported to use `download_only` and `experimental_index_url`")
Expand All @@ -194,6 +217,12 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
cache = simpleapi_cache,
parallel_download = pip_attr.parallel_download,
)
parse_requirements_add_dists(
requirements_by_platform,
index_urls,
python_version = major_minor,
logger = logger,
)

repository_platform = host_platform(module_ctx.os)
for whl_name, requirements in requirements_by_platform.items():
Expand Down Expand Up @@ -255,37 +284,22 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
)
whl_library_args.update({k: v for k, (v, default) in maybe_args_with_default.items() if v == default})

if index_urls:
whls = []
sdist = None
for sha256 in requirement.srcs.shas:
# For now if the artifact is marked as yanked we just ignore it.
#
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

maybe_whl = index_urls[whl_name].whls.get(sha256)
if maybe_whl and not maybe_whl.yanked:
whls.append(maybe_whl)
continue

maybe_sdist = index_urls[whl_name].sdists.get(sha256)
if maybe_sdist and not maybe_sdist.yanked:
sdist = maybe_sdist
continue

print("WARNING: Could not find a whl or an sdist with sha256={}".format(sha256)) # buildifier: disable=print

if requirement.whls or requirement.sdists:
logger.debug("Selecting a compatible dist for {} from dists:\n{}".format(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some extra format() calls probably aren't a big deal, but serializing a json object as part of a debug call is a bit much. Perhaps passing a lambda instead, to defer evaluation?

Also, repo_utils.debug_print exists to base it upon an env var

repository_platform,
json.encode(
struct(
whls = requirement.whls,
sdists = requirement.sdists,
),
),
))
distribution = select_whl(
whls = whls,
want_abis = [
"none",
"abi3",
"cp" + major_minor.replace(".", ""),
# Older python versions have wheels for the `*m` ABI.
"cp" + major_minor.replace(".", "") + "m",
],
whls = requirement.whls,
want_platform = repository_platform,
) or sdist
) or (requirement.sdists[0] if requirement.sdists else None)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the 0th sdist special? Or is this just picking an arbitrary element to have something valid?


logger.debug("Selected: {}".format(distribution))

if distribution:
whl_library_args["requirement"] = requirement.srcs.requirement
Expand All @@ -303,7 +317,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
# This is no-op because pip is not used to download the wheel.
whl_library_args.pop("download_only", None)
else:
print("WARNING: falling back to pip for installing the right file for {}".format(requirement.requirement_line)) # buildifier: disable=print
logger.warn("falling back to pip for installing the right file for {}".format(requirement.requirement_line))

# We sort so that the lock-file remains the same no matter the order of how the
# args are manipulated in the code going before.
Expand Down Expand Up @@ -569,6 +583,13 @@ The Python version the dependencies are targetting, in Major.Minor format
If an interpreter isn't explicitly provided (using `python_interpreter` or
`python_interpreter_target`), then the version specified here must have
a corresponding `python.toolchain()` configured.
""",
),
"verbosity": attr.string(
default = "",
values = ["TRACE", "DEBUG", "INFO"],
doc = """
The verbosity with which we should print diagnostic messages when 'quiet = False'.
""",
),
"whl_modifications": attr.label_keyed_string_dict(
Expand Down
11 changes: 11 additions & 0 deletions python/private/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def _default_platforms(*, filter):
if not filter:
fail("Must specific a filter string, got: {}".format(filter))

if filter.startswith("cp3"):
# TODO @aignas 2024-05-23: properly handle python versions in the filter.
# For now we are just dropping it to ensure that we don't fail.
_, _, filter = filter.partition("_")

sanitized = filter.replace("*", "").replace("_", "")
if sanitized and not sanitized.isalnum():
fail("The platform filter can only contain '*', '_' and alphanumerics")
Expand Down Expand Up @@ -321,6 +326,12 @@ def parse_requirements(
target_platforms = sorted(r.target_platforms),
extra_pip_args = r.extra_pip_args,
download = r.download,
# Note, some lock file formats have URLs and dists stored in
# the file, this field can be used for storing those values in
# the future. This is also going to be used by the pypi_index
# helper.
whls = [],
sdists = [],
)
for r in sorted(reqs.values(), key = lambda r: r.requirement_line)
]
Expand Down
79 changes: 79 additions & 0 deletions python/private/parse_requirements_add_dists.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2024 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
A simple helper to populate the distribution attributes for each entry returned
by the parse_requirements function.

TODO @aignas 2024-05-23: The name is up for bikeshedding. For the time being I
am keeping it together with parse_requirements.bzl.
"""

load(":whl_target_platforms.bzl", "select_whls")

def parse_requirements_add_dists(requirements_by_platform, index_urls, python_version, logger = None):
"""Populate dists based on the information from the PyPI index.

This function will modify the given requirements_by_platform data structure.

Args:
requirements_by_platform: The result of parse_requirements function.
index_urls: The result of simpleapi_download.
python_version: The version of the python interpreter.
logger: A logger for printing diagnostic info.
"""
for whl_name, requirements in requirements_by_platform.items():
for requirement in requirements:
whls = []
sdist = None

# TODO @aignas 2024-05-22: it is in theory possible to add all
# requirements by version instead of by sha256. This may be useful
# for some projects.
for sha256 in requirement.srcs.shas:
# For now if the artifact is marked as yanked we just ignore it.
#
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

maybe_whl = index_urls[whl_name].whls.get(sha256)
if maybe_whl and not maybe_whl.yanked:
whls.append(maybe_whl)
continue

maybe_sdist = index_urls[whl_name].sdists.get(sha256)
if maybe_sdist and not maybe_sdist.yanked:
sdist = maybe_sdist
continue

if logger:
logger.warn("Could not find a whl or an sdist with sha256={}".format(sha256))

# Filter out the wheels that are incompatible with the target_platforms.
whls = select_whls(
whls = whls,
want_abis = [
"none",
"abi3",
"cp" + python_version.replace(".", ""),
# Older python versions have wheels for the `*m` ABI.
"cp" + python_version.replace(".", "") + "m",
],
want_platforms = requirement.target_platforms,
want_version = python_version,
logger = logger,
)

requirement.whls.extend(whls)
if sdist:
requirement.sdists.append(sdist)
24 changes: 23 additions & 1 deletion python/private/parse_whl_name.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@
A starlark implementation of a Wheel filename parsing.
"""

# Taken from https://peps.python.org/pep-0600/
_LEGACY_ALIASES = {
"manylinux1_i686": "manylinux_2_5_i686",
"manylinux1_x86_64": "manylinux_2_5_x86_64",
"manylinux2010_i686": "manylinux_2_12_i686",
"manylinux2010_x86_64": "manylinux_2_12_x86_64",
"manylinux2014_aarch64": "manylinux_2_17_aarch64",
"manylinux2014_armv7l": "manylinux_2_17_armv7l",
"manylinux2014_i686": "manylinux_2_17_i686",
"manylinux2014_ppc64": "manylinux_2_17_ppc64",
"manylinux2014_ppc64le": "manylinux_2_17_ppc64le",
"manylinux2014_s390x": "manylinux_2_17_s390x",
"manylinux2014_x86_64": "manylinux_2_17_x86_64",
}

def normalize_platform_tag(tag):
"""Resolve legacy aliases to modern equivalents for easier parsing elsewhere."""
return ".".join(list({
_LEGACY_ALIASES.get(p, p): None
for p in tag.split(".")
}))

def parse_whl_name(file):
"""Parse whl file name into a struct of constituents.

Expand Down Expand Up @@ -68,5 +90,5 @@ def parse_whl_name(file):
build_tag = build_tag,
python_tag = python_tag,
abi_tag = abi_tag,
platform_tag = platform_tag,
platform_tag = normalize_platform_tag(platform_tag),
)
Loading
Loading