From 68d1b4104f1d6f72ed0f3a8a5bf0a75d94cb74ec Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:01:19 +0900 Subject: [PATCH] refactor!(toolchain): remove uname dep in the repository_rule stage (#2406) Before this PR we would shell out to `uname` on UNIX systems to get the `arch` of the toolchain - on Windows we would not need to do it because there used to be only a single Windows platform. With this change we can correctly support the resolution of the python interpreter on various platforms and I have also added an env variable to customize the selection, so that users can use `musl` or a `freethreaded` interpreter if they wish. As part of this change, I have restricted visibility of the config settings used in the toolchain alias repo so that we are creating fewer targets. This is a very good time to do this before `1.0.0`. Fixes #2145 Work towards #2276 Work towards #2386 Work towards #1211 to unblock #2402 Work towards #1361 --------- Co-authored-by: Richard Levasseur --- CHANGELOG.md | 25 +++- WORKSPACE | 2 +- docs/environment-variables.md | 10 ++ examples/bzlmod/MODULE.bazel.lock | 4 +- python/private/python_register_toolchains.bzl | 6 +- python/private/repo_utils.bzl | 6 +- python/private/toolchain_aliases.bzl | 74 +++++++++ python/private/toolchains_repo.bzl | 141 +++++++----------- python/versions.bzl | 33 ++-- .../WORKSPACE | 3 +- 10 files changed, 195 insertions(+), 109 deletions(-) create mode 100644 python/private/toolchain_aliases.bzl diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eaa3fadf8..16ea38b3ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,16 +52,39 @@ Unreleased changes template. {#v0-0-0-changed} ### Changed + +**Breaking**: +* (toolchains) stop exposing config settings in python toolchain alias repos. + Please consider depending on the flags defined in + `//python/config_setting/...` and the `@platforms` package instead. +* (toolchains) consumers who were depending on the `MACOS_NAME` and the `arch` + attribute in the `PLATFORMS` list, please update your code to respect the new + values. The values now correspond to the values available in the + `@platforms//` package constraint values. +* (toolchains) `host_platform` and `interpreter` constants are no longer created + in the `toolchain` generated alias `.bzl` files. If you need to access the + host interpreter during the `repository_rule` evaluation, please use the + `@python_{version}_host//:python` targets created by + {bzl:obj}`python_register_toolchains` and + {bzl:obj}`python_register_multi_toolchains` macros or the {bzl:obj}`python` + bzlmod extension. + +Other changes: * (python_repository) Start honoring the `strip_prefix` field for `zstd` archives. {#v0-0-0-fixed} ### Fixed -* Nothing fixed. +* (toolchains) stop depending on `uname` to get the value of the host platform. {#v0-0-0-added} ### Added * (gazelle): Parser failures will now be logged to the terminal. Additional details can be logged by setting `GAZELLE_VERBOSE=1`. +* (toolchains) allow users to select which variant of the support host toolchain + they would like to use through + `RULES_PYTHON_REPO_TOOLCHAIN_{VERSION}_{OS}_{ARCH}` env variable setting. For + example, this allows one to use `freethreaded` python interpreter in the + `repository_rule` to build a wheel from `sdist`. {#v0-0-0-removed} ### Removed diff --git a/WORKSPACE b/WORKSPACE index b77918f5ef..46ebbc8cde 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -86,7 +86,7 @@ load("@rules_python_gazelle_plugin//:deps.bzl", _py_gazelle_deps = "gazelle_deps _py_gazelle_deps() # This interpreter is used for various rules_python dev-time tools -load("@python//3.11.9:defs.bzl", "interpreter") +interpreter = "@python_3_11_9_host//:python" ##################### # Install twine for our own runfiles wheel publishing. diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 2a0052923c..906281d56f 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -15,6 +15,16 @@ Determines the verbosity of logging output for repo rules. Valid values: * `TRACE` ::: +:::{envvar} RULES_PYTHON_REPO_TOOLCHAIN_VERSION_OS_ARCH + +Determines the python interpreter platform to be used for a particular +interpreter `(version, os, arch)` triple to be used in repository rules. +Replace the `VERSION_OS_ARCH` part with actual values when using, e.g. +`3_13_0_linux_x86_64`. The version values must have `_` instead of `.` and the +os, arch values are the same as the ones mentioned in the +`//python:versions.bzl` file. +::: + :::{envvar} RULES_PYTHON_PIP_ISOLATED Determines if `--isolated` is used with pip. diff --git a/examples/bzlmod/MODULE.bazel.lock b/examples/bzlmod/MODULE.bazel.lock index 5a546c2f7a..8bad32a07b 100644 --- a/examples/bzlmod/MODULE.bazel.lock +++ b/examples/bzlmod/MODULE.bazel.lock @@ -1562,7 +1562,7 @@ }, "@@rules_python~//python/extensions:pip.bzl%pip": { "general": { - "bzlTransitiveDigest": "MwmpiMn2qoAVC+3E9MF3E98fB8v1utYBfMa0frXyi7g=", + "bzlTransitiveDigest": "mCwiXbsZmReVgs884fZHYfxaZaL9mFG+prEnH/lpE9g=", "usagesDigest": "VmrNvB/4EhzsYieLDka9584M+pYKPpjNLl3Wcb5rx/c=", "recordedFileInputs": { "@@//requirements_lock_3_10.txt": "5e7083982a7e60f34998579a0ae83b520d46ab8f2552cc51337217f024e6def5", @@ -7035,7 +7035,7 @@ }, "@@rules_python~//python/private/pypi:pip.bzl%pip_internal": { "general": { - "bzlTransitiveDigest": "Kx383BMHUpAHEjRiU5aWU4QTRQVg+Uu+Mgi7jVxuz0c=", + "bzlTransitiveDigest": "Xu1N6572iHVqGChH12PpMhprC21k3CpjRZVpm3FmE2c=", "usagesDigest": "/lZXl/ZgP+u5PE8WkeWTyYBsvX9XQWFn1antj5qrBzQ=", "recordedFileInputs": { "@@rules_python~//tools/publish/requirements_linux.txt": "8175b4c8df50ae2f22d1706961884beeb54e7da27bd2447018314a175981997d", diff --git a/python/private/python_register_toolchains.bzl b/python/private/python_register_toolchains.bzl index 98c8e5bfc3..cd3e9cbed7 100644 --- a/python/private/python_register_toolchains.bzl +++ b/python/private/python_register_toolchains.bzl @@ -160,7 +160,11 @@ def python_register_toolchains( platform = platform, )) - host_toolchain(name = name + "_host") + host_toolchain( + name = name + "_host", + platforms = loaded_platforms, + python_version = python_version, + ) toolchain_aliases( name = name, diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl index e0bf69acac..0e3f7b024b 100644 --- a/python/private/repo_utils.bzl +++ b/python/private/repo_utils.bzl @@ -41,6 +41,10 @@ def _logger(mrctx, name = None): Returns: A struct with attributes logging: trace, debug, info, warn, fail. + Please use `return logger.fail` when using the `fail` method, because + it makes `buildifier` happy and ensures that other implementation of + the logger injected into the function work as expected by terminating + on the given line. """ if _is_repo_debug_enabled(mrctx): verbosity_level = "DEBUG" @@ -140,7 +144,7 @@ def _execute_internal( result = mrctx.execute(arguments, environment = environment, **kwargs) if fail_on_error and result.return_code != 0: - logger.fail(( + return logger.fail(( "repo.execute: {op}: end: failure:\n" + " command: {cmd}\n" + " return code: {return_code}\n" + diff --git a/python/private/toolchain_aliases.bzl b/python/private/toolchain_aliases.bzl new file mode 100644 index 0000000000..31ac4a8fdf --- /dev/null +++ b/python/private/toolchain_aliases.bzl @@ -0,0 +1,74 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create toolchain alias targets.""" + +load("@rules_python//python:versions.bzl", "PLATFORMS") + +def toolchain_aliases(*, name, platforms, visibility = None, native = native): + """Create toolchain aliases for the python toolchains. + + Args: + name: {type}`str` The name of the current repository. + platforms: {type}`platforms` The list of platforms that are supported + for the current toolchain repository. + visibility: {type}`list[Target] | None` The visibility of the aliases. + native: The native struct used in the macro, useful for testing. + """ + for platform in PLATFORMS.keys(): + if platform not in platforms: + continue + + native.config_setting( + name = platform, + flag_values = PLATFORMS[platform].flag_values, + constraint_values = PLATFORMS[platform].compatible_with, + visibility = ["//visibility:private"], + ) + + prefix = name + for name in [ + "files", + "includes", + "libpython", + "py3_runtime", + "python_headers", + "python_runtimes", + ]: + native.alias( + name = name, + actual = select({ + ":" + platform: "@{}_{}//:{}".format(prefix, platform, name) + for platform in platforms + }), + visibility = visibility, + ) + + native.alias( + name = "python3", + actual = select({ + ":" + platform: "@{}_{}//:{}".format(prefix, platform, "python.exe" if "windows" in platform else "bin/python3") + for platform in platforms + }), + visibility = visibility, + ) + native.alias( + name = "pip", + actual = select({ + ":" + platform: "@{}_{}//:python_runtimes".format(prefix, platform) + for platform in platforms + if "windows" not in platform + }), + visibility = visibility, + ) diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl index d21fb53a41..7e9a0c7ff9 100644 --- a/python/private/toolchains_repo.bzl +++ b/python/private/toolchains_repo.bzl @@ -25,8 +25,6 @@ platform-specific repositories. load( "//python:versions.bzl", - "LINUX_NAME", - "MACOS_NAME", "PLATFORMS", "WINDOWS_NAME", ) @@ -126,43 +124,26 @@ toolchains_repo = repository_rule( ) def _toolchain_aliases_impl(rctx): - logger = repo_utils.logger(rctx) - (os_name, arch) = _get_host_os_arch(rctx, logger) - - host_platform = _get_host_platform(os_name, arch) - - is_windows = (os_name == WINDOWS_NAME) - python3_binary_path = "python.exe" if is_windows else "bin/python3" - # Base BUILD file for this repository. build_contents = """\ # Generated by python/private/toolchains_repo.bzl +load("@rules_python//python/private:toolchain_aliases.bzl", "toolchain_aliases") + package(default_visibility = ["//visibility:public"]) -load("@rules_python//python:versions.bzl", "gen_python_config_settings") -gen_python_config_settings() + exports_files(["defs.bzl"]) PLATFORMS = [ {loaded_platforms} ] -alias(name = "files", actual = select({{":" + item: "@{py_repository}_" + item + "//:files" for item in PLATFORMS}})) -alias(name = "includes", actual = select({{":" + item: "@{py_repository}_" + item + "//:includes" for item in PLATFORMS}})) -alias(name = "libpython", actual = select({{":" + item: "@{py_repository}_" + item + "//:libpython" for item in PLATFORMS}})) -alias(name = "py3_runtime", actual = select({{":" + item: "@{py_repository}_" + item + "//:py3_runtime" for item in PLATFORMS}})) -alias(name = "python_headers", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_headers" for item in PLATFORMS}})) -alias(name = "python_runtimes", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS}})) -alias(name = "python3", actual = select({{":" + item: "@{py_repository}_" + item + "//:" + ("python.exe" if "windows" in item else "bin/python3") for item in PLATFORMS}})) +toolchain_aliases( + name = "{py_repository}", + platforms = PLATFORMS, +) """.format( py_repository = rctx.attr.user_repository_name, loaded_platforms = "\n".join([" \"{}\",".format(p) for p in rctx.attr.platforms]), ) - if not is_windows: - build_contents += """\ -alias(name = "pip", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS if "windows" not in item}})) -""".format( - py_repository = rctx.attr.user_repository_name, - host_platform = host_platform, - ) rctx.file("BUILD.bazel", build_contents) # Expose a Starlark file so rules can know what host platform we used and where to find an interpreter @@ -181,9 +162,6 @@ load( ) load("{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements") -host_platform = "{host_platform}" -interpreter = "@{py_repository}_{host_platform}//:{python3_binary_path}" - def py_binary(name, **kwargs): return _py_binary( name = name, @@ -214,10 +192,7 @@ def compile_pip_requirements(name, **kwargs): ) """.format( - host_platform = host_platform, - py_repository = rctx.attr.user_repository_name, python_version = rctx.attr.python_version, - python3_binary_path = python3_binary_path, rules_python = get_repository_name(rctx.attr._rules_python_workspace), )) @@ -243,15 +218,21 @@ actions.""", ) def _host_toolchain_impl(rctx): - logger = repo_utils.logger(rctx) rctx.file("BUILD.bazel", """\ # Generated by python/private/toolchains_repo.bzl exports_files(["python"], visibility = ["//visibility:public"]) """) - (os_name, arch) = _get_host_os_arch(rctx, logger) - host_platform = _get_host_platform(os_name, arch) + os_name = repo_utils.get_platforms_os_name(rctx) + host_platform = _get_host_platform( + rctx = rctx, + logger = repo_utils.logger(rctx), + python_version = rctx.attr.python_version, + os_name = os_name, + cpu_name = repo_utils.get_platforms_cpu_name(rctx), + platforms = rctx.attr.platforms, + ) repo = "@@{py_repository}_{host_platform}".format( py_repository = rctx.attr.name[:-len("_host")], host_platform = host_platform, @@ -320,6 +301,8 @@ toolchain_aliases repo because referencing the `python` interpreter target from this repo causes an eager fetch of the toolchain for the host platform. """, attrs = { + "platforms": attr.string_list(mandatory = True), + "python_version": attr.string(mandatory = True), "_rule_name": attr.string(default = "host_toolchain"), "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")), }, @@ -336,16 +319,12 @@ def _multi_toolchain_aliases_impl(rctx): load( "@{repository_name}//:defs.bzl", _compile_pip_requirements = "compile_pip_requirements", - _host_platform = "host_platform", - _interpreter = "interpreter", _py_binary = "py_binary", _py_console_script_binary = "py_console_script_binary", _py_test = "py_test", ) compile_pip_requirements = _compile_pip_requirements -host_platform = _host_platform -interpreter = _interpreter py_binary = _py_binary py_console_script_binary = _py_console_script_binary py_test = _py_test @@ -388,57 +367,51 @@ multi_toolchain_aliases = repository_rule( def sanitize_platform_name(platform): return platform.replace("-", "_") -def _get_host_platform(os_name, arch): +def _get_host_platform(*, rctx, logger, python_version, os_name, cpu_name, platforms): """Gets the host platform. Args: - os_name: the host OS name. - arch: the host arch. + rctx: {type}`repository_ctx`. + logger: {type}`struct`. + python_version: {type}`string`. + os_name: {type}`str` the host OS name. + cpu_name: {type}`str` the host CPU name. + platforms: {type}`list[str]` the list of loaded platforms. Returns: The host platform. """ - host_platform = None - for platform, meta in PLATFORMS.items(): - if "freethreaded" in platform: - continue - - if meta.os_name == os_name and meta.arch == arch: - host_platform = platform - if not host_platform: - fail("No platform declared for host OS {} on arch {}".format(os_name, arch)) - return host_platform + candidates = [] + for platform in platforms: + meta = PLATFORMS[platform] -def _get_host_os_arch(rctx, logger): - """Infer the host OS name and arch from a repository context. + if meta.os_name == os_name and meta.arch == cpu_name: + candidates.append(platform) - Args: - rctx: Bazel's repository_ctx. - logger: Logger to use for operations. + if len(candidates) == 1: + return candidates[0] - Returns: - A tuple with the host OS name and arch. - """ - os_name = rctx.os.name - - # We assume the arch for Windows is always x86_64. - if "windows" in os_name.lower(): - arch = "x86_64" - - # Normalize the os_name. E.g. os_name could be "OS windows server 2019". - os_name = WINDOWS_NAME - else: - # This is not ideal, but bazel doesn't directly expose arch. - arch = repo_utils.execute_unchecked( - rctx, - op = "GetUname", - arguments = [repo_utils.which_checked(rctx, "uname"), "-m"], - logger = logger, - ).stdout.strip() - - # Normalize the os_name. - if "mac" in os_name.lower(): - os_name = MACOS_NAME - elif "linux" in os_name.lower(): - os_name = LINUX_NAME - - return (os_name, arch) + if candidates: + env_var = "RULES_PYTHON_REPO_TOOLCHAIN_{}_{}_{}".format( + python_version.replace(".", "_"), + os_name.upper(), + cpu_name.upper(), + ) + preference = repo_utils.getenv(rctx, env_var) + if preference == None: + logger.info("Consider using '{}' to select from one of the platforms: {}".format( + env_var, + candidates, + )) + elif preference not in candidates: + return logger.fail("Please choose a preferred interpreter out of the following platforms: {}".format(candidates)) + else: + candidates = [preference] + + if candidates: + return candidates[0] + + return logger.fail("Could not find a compatible 'host' python for '{os_name}', '{cpu_name}' from the loaded platforms: {platforms}".format( + os_name = os_name, + cpu_name = cpu_name, + platforms = platforms, + )) diff --git a/python/versions.bzl b/python/versions.bzl index 688c4e2ceb..d229b9d1db 100644 --- a/python/versions.bzl +++ b/python/versions.bzl @@ -15,8 +15,8 @@ """The Python versions we use for the toolchains. """ -# Values returned by https://bazel.build/rules/lib/repository_os. -MACOS_NAME = "mac os" +# Values present in the @platforms//os package +MACOS_NAME = "osx" LINUX_NAME = "linux" WINDOWS_NAME = "windows" FREETHREADED = "freethreaded" @@ -620,9 +620,8 @@ def _generate_platforms(): ], flag_values = {}, os_name = MACOS_NAME, - # Matches the value returned from: - # repository_ctx.execute(["uname", "-m"]).stdout.strip() - arch = "arm64", + # Matches the value in @platforms//cpu package + arch = "aarch64", ), "aarch64-unknown-linux-gnu": struct( compatible_with = [ @@ -633,9 +632,7 @@ def _generate_platforms(): libc: "glibc", }, os_name = LINUX_NAME, - # Note: this string differs between OSX and Linux - # Matches the value returned from: - # repository_ctx.execute(["uname", "-m"]).stdout.strip() + # Matches the value in @platforms//cpu package arch = "aarch64", ), "armv7-unknown-linux-gnu": struct( @@ -647,7 +644,8 @@ def _generate_platforms(): libc: "glibc", }, os_name = LINUX_NAME, - arch = "armv7", + # Matches the value in @platforms//cpu package + arch = "arm", ), "i386-unknown-linux-gnu": struct( compatible_with = [ @@ -658,7 +656,8 @@ def _generate_platforms(): libc: "glibc", }, os_name = LINUX_NAME, - arch = "i386", + # Matches the value in @platforms//cpu package + arch = "x86_32", ), "ppc64le-unknown-linux-gnu": struct( compatible_with = [ @@ -669,10 +668,8 @@ def _generate_platforms(): libc: "glibc", }, os_name = LINUX_NAME, - # Note: this string differs between OSX and Linux - # Matches the value returned from: - # repository_ctx.execute(["uname", "-m"]).stdout.strip() - arch = "ppc64le", + # Matches the value in @platforms//cpu package + arch = "ppc", ), "riscv64-unknown-linux-gnu": struct( compatible_with = [ @@ -683,6 +680,7 @@ def _generate_platforms(): Label("//python/config_settings:py_linux_libc"): "glibc", }, os_name = LINUX_NAME, + # Matches the value in @platforms//cpu package arch = "riscv64", ), "s390x-unknown-linux-gnu": struct( @@ -694,9 +692,7 @@ def _generate_platforms(): Label("//python/config_settings:py_linux_libc"): "glibc", }, os_name = LINUX_NAME, - # Note: this string differs between OSX and Linux - # Matches the value returned from: - # repository_ctx.execute(["uname", "-m"]).stdout.strip() + # Matches the value in @platforms//cpu package arch = "s390x", ), "x86_64-apple-darwin": struct( @@ -706,6 +702,7 @@ def _generate_platforms(): ], flag_values = {}, os_name = MACOS_NAME, + # Matches the value in @platforms//cpu package arch = "x86_64", ), "x86_64-pc-windows-msvc": struct( @@ -715,6 +712,7 @@ def _generate_platforms(): ], flag_values = {}, os_name = WINDOWS_NAME, + # Matches the value in @platforms//cpu package arch = "x86_64", ), "x86_64-unknown-linux-gnu": struct( @@ -726,6 +724,7 @@ def _generate_platforms(): libc: "glibc", }, os_name = LINUX_NAME, + # Matches the value in @platforms//cpu package arch = "x86_64", ), } diff --git a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE index 48caeb442f..7834000854 100644 --- a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE +++ b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE @@ -12,7 +12,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") local_repository( @@ -22,7 +21,7 @@ local_repository( pip_parse( name = "pypi", - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "@compile_pip_requirements//:requirements_lock.txt", )