From 5293cadd564ecacbe5d1294d25deb565a8e6f82e Mon Sep 17 00:00:00 2001 From: Siddhartha Bagaria Date: Wed, 22 Sep 2021 20:30:53 -0700 Subject: [PATCH] Refactor to support cross-compilation Make separate the notion of host and target OS and CPU architecture throughout the codebase. Always take {os}-{arch} pairs where we were taking just the os name before and be clear when it is intended to be host and when target. --- README.md | 6 + WORKSPACE | 2 +- tests/scripts/linux_sysroot_test.sh | 2 +- toolchain/BUILD.toolchain.tpl | 74 +---- ...config.bzl.tpl => cc_toolchain_config.bzl} | 240 +++++++------- toolchain/internal/common.bzl | 19 +- toolchain/internal/configure.bzl | 305 +++++++++++------- toolchain/internal/sysroot.bzl | 12 +- toolchain/osx_cc_wrapper.sh.tpl | 2 +- toolchain/rules.bzl | 11 +- toolchain/toolchains.bzl.tpl | 4 +- 11 files changed, 358 insertions(+), 319 deletions(-) rename toolchain/{cc_toolchain_config.bzl.tpl => cc_toolchain_config.bzl} (50%) diff --git a/README.md b/README.md index 0e47e520..7e1ac466 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ LLVM toolchain for Bazel [![Tests](https://github.com/grailbio/bazel-toolchain/actions/workflows/tests.yml/badge.svg)](https://github.com/grailbio/bazel-toolchain/actions/workflows/tests.yml) [![Migration](https://github.com/grailbio/bazel-toolchain/actions/workflows/migration.yml/badge.svg)](https://github.com/grailbio/bazel-toolchain/actions/workflows/migration.yml) ================= +------- + +Required minimum bazel version: 4.0.0 + +------- + To use this toolchain, include this section in your WORKSPACE: ```python # Change master to the git tag you want. diff --git a/WORKSPACE b/WORKSPACE index 107f430a..447b13b7 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -49,7 +49,7 @@ filegroup( ) llvm_toolchain( - name = "llvm_toolchain_linux_sysroot", + name = "llvm_toolchain_with_sysroot", llvm_version = "12.0.0", sysroot = { "linux-x86_64": "@org_chromium_sysroot_linux_x64//:sysroot", diff --git a/tests/scripts/linux_sysroot_test.sh b/tests/scripts/linux_sysroot_test.sh index e72241c0..2b030cb9 100755 --- a/tests/scripts/linux_sysroot_test.sh +++ b/tests/scripts/linux_sysroot_test.sh @@ -36,6 +36,6 @@ apt-get -qq -y install apt-utils curl pkg-config zip g++ zlib1g-dev unzip python # Run tests cd /src -tests/scripts/run_tests.sh -t '@llvm_toolchain_linux_sysroot//:cc-toolchain-k8-linux' +tests/scripts/run_tests.sh -t '@llvm_toolchain_with_sysroot//:cc-toolchain-x86_64-linux' """ done diff --git a/toolchain/BUILD.toolchain.tpl b/toolchain/BUILD.toolchain.tpl index 6b9c010d..cffdcddf 100644 --- a/toolchain/BUILD.toolchain.tpl +++ b/toolchain/BUILD.toolchain.tpl @@ -15,79 +15,27 @@ package(default_visibility = ["//visibility:public"]) load("@rules_cc//cc:defs.bzl", "cc_toolchain", "cc_toolchain_suite") +load("%{cc_toolchain_config_bzl}", "cc_toolchain_config") exports_files(["Makevars"]) +# Needed for old style --cpu and --compiler command line flags when using +# crosstool_top. +# TODO: Delete this and rely on toolchain registration mechanism alone. cc_toolchain_suite( name = "toolchain", toolchains = { - "k8|clang": ":cc-clang-k8-linux", + "k8|clang": ":cc-clang-x86_64-linux", "aarch64|clang": ":cc-clang-aarch64-linux", - "darwin|clang": ":cc-clang-darwin", - "k8": ":cc-clang-k8-linux", + "darwin|clang": ":cc-clang-x86_64-darwin", + "k8": ":cc-clang-x86_64-linux", "aarch64": ":cc-clang-aarch64-linux", - "darwin": ":cc-clang-darwin", + "darwin": ":cc-clang-x86_64-darwin", }, ) -load(":cc_toolchain_config.bzl", "cc_toolchain_config") - -cc_toolchain_config( - name = "local_linux_k8", - cpu = "k8", -) - -cc_toolchain_config( - name = "local_linux_aarch64", - cpu = "aarch64", -) - -cc_toolchain_config( - name = "local_darwin", - cpu = "darwin", -) - -toolchain( - name = "cc-toolchain-darwin", - exec_compatible_with = [ - "@platforms//cpu:x86_64", - "@platforms//os:osx", - ], - target_compatible_with = [ - "@platforms//cpu:x86_64", - "@platforms//os:osx", - ], - toolchain = ":cc-clang-darwin", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) - -toolchain( - name = "cc-toolchain-k8-linux", - exec_compatible_with = [ - "@platforms//cpu:x86_64", - "@platforms//os:linux", - ], - target_compatible_with = [ - "@platforms//cpu:x86_64", - "@platforms//os:linux", - ], - toolchain = ":cc-clang-k8-linux", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) - -toolchain( - name = "cc-toolchain-aarch64-linux", - exec_compatible_with = [ - "@platforms//cpu:aarch64", - "@platforms//os:linux", - ], - target_compatible_with = [ - "@platforms//cpu:aarch64", - "@platforms//os:linux", - ], - toolchain = ":cc-clang-aarch64-linux", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) +# Following filegroup targets are used when not using absolute paths and shared +# between different toolchains. filegroup( name = "empty", @@ -95,7 +43,7 @@ filegroup( ) filegroup( - name = "cc_wrapper", + name = "cc-wrapper", srcs = ["bin/cc_wrapper.sh"], ) diff --git a/toolchain/cc_toolchain_config.bzl.tpl b/toolchain/cc_toolchain_config.bzl similarity index 50% rename from toolchain/cc_toolchain_config.bzl.tpl rename to toolchain/cc_toolchain_config.bzl index c632524d..84166aed 100644 --- a/toolchain/cc_toolchain_config.bzl.tpl +++ b/toolchain/cc_toolchain_config.bzl @@ -1,4 +1,4 @@ -# Copyright 2018 The Bazel Authors. +# Copyright 2021 The Bazel Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,50 +16,71 @@ load( "@bazel_tools//tools/cpp:unix_cc_toolchain_config.bzl", unix_cc_toolchain_config = "cc_toolchain_config", ) +load( + "//toolchain/internal:common.bzl", + _check_os_arch_keys = "check_os_arch_keys", + _os_arch_pair = "os_arch_pair", +) -def cc_toolchain_config(name, cpu): - if not (cpu in ["aarch64", "darwin", "k8"]): - fail("Unreachable") +# Macro for calling cc_toolchain_config from @bazel_tools with setting the +# right paths and flags for the tools. +def cc_toolchain_config( + name, + host_arch, + host_os, + target_arch, + target_os, + toolchain_path_prefix, + tools_path_prefix, + cc_wrapper_prefix, + sysroot_path, + additional_include_dirs, + llvm_version): + host_os_arch_key = _os_arch_pair(host_os, host_arch) + target_os_arch_key = _os_arch_pair(target_os, target_arch) + _check_os_arch_keys([host_os_arch_key, target_os_arch_key]) # A bunch of variables that get passed straight through to # `create_cc_toolchain_config_info`. - (toolchain_identifier, host_system_name, target_system_name, target_cpu, - target_libc, compiler, abi_version, abi_libc_version, builtin_sysroot) = { - "darwin": ( - "clang-darwin", - "x86_64-apple-macosx", + # TODO: What do these values mean, and are they actually all correct? + host_system_name = host_arch + ( + toolchain_identifier, + target_system_name, + target_cpu, + target_libc, + compiler, + abi_version, + abi_libc_version, + ) = { + "darwin-x86_64": ( + "clang-x86_64-darwin", "x86_64-apple-macosx", "darwin", "macosx", "clang", "darwin_x86_64", "darwin_x86_64", - "%{sysroot_path}", ), - "k8": ( - "clang-k8-linux", - "x86_64", + "linux-x86_64": ( + "clang-x86_64-linux", "x86_64-unknown-linux-gnu", "k8", "glibc_unknown", "clang", "clang", "glibc_unknown", - "%{sysroot_path}", ), - "aarch64": ( + "linux-aarch64": ( "clang-aarch64-linux", - "aarch64", "aarch64-unknown-linux-gnu", "aarch64", "glibc_unknown", "clang", "clang", "glibc_unknown", - "%{sysroot_path}", ), - }[cpu] - + }[target_os_arch_key] # Unfiltered compiler flags: unfiltered_compile_flags = [ @@ -70,18 +91,40 @@ def cc_toolchain_config(name, cpu): "-D__DATE__=\"redacted\"", "-D__TIMESTAMP__=\"redacted\"", "-D__TIME__=\"redacted\"", - "-fdebug-prefix-map=%{toolchain_path_prefix}=__bazel_toolchain_llvm_repo__/", + "-fdebug-prefix-map={}=__bazel_toolchain_llvm_repo__/".format(toolchain_path_prefix), ] + is_xcompile = not (host_os == target_os and host_arch == target_arch) # Linker flags: - if cpu in ["k8", "aarch64"]: + if host_os == "darwin" and not is_xcompile: + # lld is experimental for Mach-O, so we use the native ld64 linker. + use_lld = False + linker_flags = [ + "-headerpad_max_install_names", + "-undefined", + "dynamic_lookup", + ] + else: + # We prefer the lld linker. + # Note that for xcompiling from darwin to linux, the native ld64 is + # not an option because it is not a cross-linker, so lld is the + # only option. + use_lld = True linker_flags = [ - # Use the lld linker. "-fuse-ld=lld", - # The linker has no way of knowing if there are C++ objects; so we - # always link C++ libraries. - "-L%{toolchain_path_prefix}lib", + "-Wl,--build-id=md5", + "-Wl,--hash-style=gnu", + "-Wl,-z,relro,-z,now", + ] + + # The linker has no way of knowing if there are C++ objects; so we + # always link C++ libraries. + if host_os == "linux" and not is_xcompile: + # For single-platform linux builds, we can statically link the bundled + # libraries. + linker_flags.extend([ + "-L{}lib".format(toolchain_path_prefix), "-l:libc++.a", "-l:libc++abi.a", "-l:libunwind.a", @@ -90,31 +133,23 @@ def cc_toolchain_config(name, cpu): # To support libunwind. "-lpthread", "-ldl", - # Other linker flags. - "-Wl,--build-id=md5", - "-Wl,--hash-style=gnu", - "-Wl,-z,relro,-z,now", - ] - elif cpu == "darwin": - linker_flags = [ - # Difficult to guess options to statically link C++ libraries with - # the macOS linker. + ]) + else: + # For xcompile, we expect to pick up these libraries from the sysroot. + # TODO: For single-platform darwin builds, we can statically link the + # bundled libraries but I do not know the right flags to make it + # happen. + linker_flags.extend([ "-lc++", "-lc++abi", - "-headerpad_max_install_names", - "-undefined", - "dynamic_lookup", - ] - else: - fail("Unreachable") + ]) link_flags = [ "-lm", "-no-canonical-prefixes", ] + linker_flags - opt_link_flags = ["-Wl,--gc-sections"] if cpu in ["k8", "aarch64"] else [] - + opt_link_flags = ["-Wl,--gc-sections"] if target_os == "linux" else [] # Default compiler flags: compile_flags = [ @@ -142,121 +177,70 @@ def cc_toolchain_config(name, cpu): cxx_flags = ["-std=c++17", "-stdlib=libc++"] - # Coverage flags: coverage_compile_flags = ["-fprofile-instr-generate", "-fcoverage-mapping"] coverage_link_flags = ["-fprofile-instr-generate"] - ## NOTE: framework paths is missing here; unix_cc_toolchain_config ## doesn't seem to have a feature for this. - # C++ built-in include directories: cxx_builtin_include_directories = [ - "%{toolchain_path_prefix}include/c++/v1", - "%{toolchain_path_prefix}lib/clang/%{llvm_version}/include", - "%{toolchain_path_prefix}lib64/clang/%{llvm_version}/include", + toolchain_path_prefix + "include/c++/v1", + toolchain_path_prefix + "lib/clang/{}/include".format(llvm_version), + toolchain_path_prefix + "lib64/clang/{}/include".format(llvm_version), ] - # If `builtin_sysroot` is supported, use the `sysroot_prefix` here. - # `builtin_sysroot` support – required to use the `%sysroot%` prefix – was - # only added in bazel v4.0.0. - # - # `native.bazel_version` might give us back an empty string if a local dev build - # of bazel is being used; in this case we'll assume the version is at least - # 4.0.0. - # - # See: https://github.com/bazelbuild/bazel/commit/da345f1f249ebf28bec88c6e0d63260dfaef14e9 - builtin_sysroot_supported = int(("%{bazel_version}" or "4.0.0").split(".")[0]) >= 4 - sysroot_for_include_dirs = "%{sysroot_prefix}" if builtin_sysroot_supported else builtin_sysroot - if not sysroot_for_include_dirs.endswith('/'): - sysroot_for_include_dirs += '/' - - if (cpu in ["k8", "aarch64"]): - cxx_builtin_include_directories += [ - "{}include".format(sysroot_for_include_dirs), - "{}usr/include".format(sysroot_for_include_dirs), - "{}usr/local/include".format(sysroot_for_include_dirs), - ] - if (cpu == "k8"): - cxx_builtin_include_directories += [ - %{k8_additional_cxx_builtin_include_directories} - ] - elif (cpu == "aarch64"): - cxx_builtin_include_directories += [ - %{aarch64_additional_cxx_builtin_include_directories} - ] - elif (cpu == "darwin"): - cxx_builtin_include_directories += [ - "{}usr/include".format(sysroot_for_include_dirs), - "{}System/Library/Frameworks".format(sysroot_for_include_dirs), - "/Library/Frameworks", - ] + [ - %{darwin_additional_cxx_builtin_include_directories} - ] + sysroot_prefix = "" + if sysroot_path: + sysroot_prefix = "%sysroot%" + if target_os == "linux": + cxx_builtin_include_directories.extend([ + sysroot_prefix + "/include", + sysroot_prefix + "/usr/include", + sysroot_prefix + "/usr/local/include", + ]) + elif target_os == "darwin": + cxx_builtin_include_directories.extend([ + sysroot_prefix + "/usr/include", + sysroot_prefix + "/System/Library/Frameworks", + ]) else: fail("Unreachable") + cxx_builtin_include_directories.extend(additional_include_dirs) ## NOTE: make variables are missing here; unix_cc_toolchain_config doesn't ## pass these to `create_cc_toolchain_config_info`. - # Tool paths: # `llvm-strip` was introduced in V7 (https://reviews.llvm.org/D46407): - llvm_version = "%{llvm_version}".split(".") + llvm_version = llvm_version.split(".") llvm_major_ver = int(llvm_version[0]) if len(llvm_version) else 0 - strip_binary = \ - "%{tools_path_prefix}bin/llvm-strip" if llvm_major_ver >= 7 else "/usr/bin/strip" + strip_binary = (tools_path_prefix + "bin/llvm-strip") if llvm_major_ver >= 7 else "/usr/bin/strip" tool_paths = { - "cpp": "%{tools_path_prefix}bin/clang-cpp", - "dwp": "%{tools_path_prefix}bin/llvm-dwp", - "gcov": "%{tools_path_prefix}bin/llvm-profdata", - "llvm-cov": "%{tools_path_prefix}bin/llvm-cov", - "nm": "%{tools_path_prefix}bin/llvm-nm", - "objcopy": "%{tools_path_prefix}bin/llvm-objcopy", - "objdump": "%{tools_path_prefix}bin/llvm-objdump", + # TODO: The command line formed on darwin does not work with llvm-ar. + "ar": tools_path_prefix + "bin/llvm-ar" if host_os != "darwin" else "/usr/bin/libtool", + "cpp": tools_path_prefix + "bin/clang-cpp", + "dwp": tools_path_prefix + "bin/llvm-dwp", + "gcc": cc_wrapper_prefix + "bin/cc_wrapper.sh", + "gcov": tools_path_prefix + "bin/llvm-profdata", + "ld": tools_path_prefix + "bin/ld.lld" if use_lld else "/usr/bin/ld", + "llvm-cov": tools_path_prefix + "bin/llvm-cov", + "nm": tools_path_prefix + "bin/llvm-nm", + "objcopy": tools_path_prefix + "bin/llvm-objcopy", + "objdump": tools_path_prefix + "bin/llvm-objdump", "strip": strip_binary, } - tool_paths.update({ - "k8": { - "ld": "%{tools_path_prefix}bin/ld.lld", - "gcc": "%{cc_wrapper_prefix}bin/cc_wrapper.sh", - "ar": "%{tools_path_prefix}bin/llvm-ar", - }, - "aarch64": { - "ld": "%{tools_path_prefix}bin/ld.lld", - "gcc": "%{cc_wrapper_prefix}bin/cc_wrapper.sh", - "ar": "%{tools_path_prefix}bin/llvm-ar", - }, - "darwin": { - # ld.lld Mach-O support is still experimental: - "ld": "%{tools_path_prefix}bin/ld", - # See `cc_wrapper.sh.tpl` for details: - "gcc": "%{cc_wrapper_prefix}bin/cc_wrapper.sh", - # No idea why we use `libtool` instead of `llvm-ar` on macOS: - "ar": "/usr/bin/libtool", - }, - }[cpu]) - # Start-end group linker support: # This was added to `lld` in this patch: http://reviews.llvm.org/D18814 # # The oldest version of LLVM that we support is 6.0.0 which was released - # after the above patch was merged, so we just set this to `True` when `lld` - # is being used as the linker, which is always... except on macOS since - # `lld` Mach-O support is still experimental. - supports_start_end_lib = tool_paths["ld"].endswith("ld.lld") - - # Additional arguments to cc_toolchain_config. - kwargs = {} - if builtin_sysroot_supported and builtin_sysroot: - # This was only added in bazel v4.0.0. - # See: https://github.com/bazelbuild/bazel/commit/da345f1f249ebf28bec88c6e0d63260dfaef14e9 - kwargs.update(builtin_sysroot = builtin_sysroot) + # after the above patch was merged, so we just set this to `True` when + # `lld` is being used as the linker. + supports_start_end_lib = use_lld # Source: https://cs.opensource.google/bazel/bazel/+/master:tools/cpp/unix_cc_toolchain_config.bzl unix_cc_toolchain_config( @@ -282,5 +266,5 @@ def cc_toolchain_config(name, cpu): coverage_compile_flags = coverage_compile_flags, coverage_link_flags = coverage_link_flags, supports_start_end_lib = supports_start_end_lib, - **kwargs, + builtin_sysroot = sysroot_path, ) diff --git a/toolchain/internal/common.bzl b/toolchain/internal/common.bzl index 21c5e2ad..0543192a 100644 --- a/toolchain/internal/common.bzl +++ b/toolchain/internal/common.bzl @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SUPPORTED_OS_ARCH = ["linux-x86_64", "linux-aarch64", "darwin-x86_64"] +SUPPORTED_TARGETS = [("linux", "x86_64"), ("linux", "aarch64"), ("darwin", "x86_64")] def python(rctx): # Get path of the python interpreter. @@ -39,6 +39,10 @@ def os(rctx): return "windows" fail("Unsupported OS: " + name) +def os_bzl(os): + # Return the OS string as used in bazel platform constraints. + return {"darwin": "osx", "linux": "linux"}[os] + def arch(rctx): exec_result = rctx.execute([ python(rctx), @@ -49,15 +53,20 @@ def arch(rctx): fail("Failed to detect machine architecture: \n%s\n%s" % (exec_result.stdout, exec_result.stderr)) return exec_result.stdout.strip() -def os_arch_pair(shortos, arch): - return "{}-{}".format(shortos, arch) +def os_arch_pair(os, arch): + return "{}-{}".format(os, arch) + +_supported_os_arch = [os_arch_pair(os, arch) for (os, arch) in SUPPORTED_TARGETS] + +def supported_os_arch_keys(): + return _supported_os_arch def check_os_arch_keys(keys): for k in keys: - if k and k not in SUPPORTED_OS_ARCH: + if k and k not in _supported_os_arch: fail("Unsupported {{os}}-{{arch}} key: {key}; valid keys are: {keys}".format( key = k, - keys = ", ".join(SUPPORTED_OS_ARCH), + keys = ", ".join(_supported_os_arch), )) def canonical_dir_path(path): diff --git a/toolchain/internal/configure.bzl b/toolchain/internal/configure.bzl index 3d5cf4ad..016e21ee 100644 --- a/toolchain/internal/configure.bzl +++ b/toolchain/internal/configure.bzl @@ -19,19 +19,18 @@ load( _check_os_arch_keys = "check_os_arch_keys", _os = "os", _os_arch_pair = "os_arch_pair", + _os_bzl = "os_bzl", _pkg_path_from_label = "pkg_path_from_label", + _supported_targets = "SUPPORTED_TARGETS", ) load( "//toolchain/internal:sysroot.bzl", + _default_sysroot_path = "default_sysroot_path", _sysroot_path = "sysroot_path", ) load("@rules_cc//cc:defs.bzl", _cc_toolchain = "cc_toolchain") def _makevars_ld_flags(rctx, os): - if os == "darwin": - return "" - - # lld, as of LLVM 7, is experimental for Mach-O, so we use it only on linux. return "-fuse-ld=lld" def _include_dirs_str(rctx, key): @@ -48,10 +47,10 @@ def llvm_config_impl(rctx): os = _os(rctx) if os == "windows": rctx.file("BUILD.bazel") - rctx.file("toolchains.bzl", """ + rctx.file("toolchains.bzl", """\ def llvm_register_toolchains(): pass - """) +""") return arch = _arch(rctx) @@ -96,54 +95,47 @@ def llvm_register_toolchains(): tools_path_prefix = "llvm/" cc_wrapper_prefix = "" - sysroot_path, sysroot = _sysroot_path(rctx, os, arch) - sysroot_label = "\"%s\"" % str(sysroot) if sysroot else "" + default_sysroot_path = _default_sysroot_path(rctx, os) - cc_toolchains_str = ( - _llvm_filegroups_str(sysroot_label, toolchain_root, use_absolute_paths) + - _cc_toolchain_str("cc-clang-k8-linux", "local_linux_k8", False, use_absolute_paths) + - _cc_toolchain_str("cc-clang-aarch64-linux", "local_linux_aarch64", False, use_absolute_paths) + - _cc_toolchain_str("cc-clang-darwin", "local_darwin", True, use_absolute_paths) + workspace_name = rctx.name + toolchain_info = struct( + os = os, + arch = arch, + toolchain_root = toolchain_root, + toolchain_path_prefix = toolchain_path_prefix, + tools_path_prefix = tools_path_prefix, + cc_wrapper_prefix = cc_wrapper_prefix, + additional_include_dirs_dict = rctx.attr.cxx_builtin_include_directories, + sysroot_dict = rctx.attr.sysroot, + default_sysroot_path = default_sysroot_path, + llvm_version = rctx.attr.llvm_version, + ) + cc_toolchains_str, toolchain_labels_str = _cc_toolchains_str( + workspace_name, + toolchain_info, + use_absolute_paths, ) - substitutions = { - "%{toolchain_workspace_name}": rctx.name, - "%{llvm_version}": rctx.attr.llvm_version, - "%{bazel_version}": native.bazel_version, - "%{toolchain_root}": toolchain_root, - "%{toolchain_path_prefix}": toolchain_path_prefix, - "%{tools_path_prefix}": tools_path_prefix, - "%{cc_wrapper_prefix}": cc_wrapper_prefix, - "%{sysroot_path}": sysroot_path, - "%{sysroot_prefix}": "%sysroot%" if sysroot_path else "", - "%{makevars_ld_flags}": _makevars_ld_flags(rctx, os), - "%{k8_additional_cxx_builtin_include_directories}": _include_dirs_str(rctx, "linux-x86_64"), - "%{aarch64_additional_cxx_builtin_include_directories}": _include_dirs_str(rctx, "linux-aarch64"), - "%{darwin_additional_cxx_builtin_include_directories}": _include_dirs_str(rctx, "darwin-x86_64"), - "%{cc_toolchains}": cc_toolchains_str, - } - + # Convenience macro to register all generated toolchains. rctx.template( "toolchains.bzl", Label("//toolchain:toolchains.bzl.tpl"), - substitutions, - ) - rctx.template( - "cc_toolchain_config.bzl", - Label("//toolchain:cc_toolchain_config.bzl.tpl"), - substitutions, - ) - rctx.template( - "Makevars", - Label("//toolchain:Makevars.tpl"), - substitutions, + { + "%{toolchain_labels}": toolchain_labels_str, + }, ) + + # BUILD file with all the generated toolchain definitions. rctx.template( "BUILD.bazel", Label("//toolchain:BUILD.toolchain.tpl"), - substitutions, + { + "%{cc_toolchains}": cc_toolchains_str, + "%{cc_toolchain_config_bzl}": str(rctx.attr._cc_toolchain_config_bzl), + }, ) + # CC wrapper script; see comments near the definition of cc_wrapper_prefix. if os == "darwin": cc_wrapper_tpl = "//toolchain:osx_cc_wrapper.sh.tpl" else: @@ -151,110 +143,207 @@ def llvm_register_toolchains(): rctx.template( "bin/cc_wrapper.sh", Label(cc_wrapper_tpl), - substitutions, + { + "%{toolchain_path_prefix}": toolchain_path_prefix, + }, ) -def _llvm_filegroups_str(sysroot_label, toolchain_root, use_absolute_paths): - if use_absolute_paths: - return "" + # Make vars useful for languages that interface with C/C++ and use the 'make' system. + rctx.template( + "Makevars", + Label("//toolchain:Makevars.tpl"), + { + "%{toolchain_path_prefix}": toolchain_path_prefix, + "%{makevars_ld_flags}": _makevars_ld_flags(rctx, os), + }, + ) - return """ -# LLVM distribution filegroup definitions that are used in cc_toolchain targets. +def _cc_toolchains_str(workspace_name, toolchain_info, use_absolute_paths): + # Since all the toolchains rely on downloading the right LLVM toolchain for + # the host architecture, we don't need to explicitly specify + # `exec_compatible_with` attribute. If the host and execution platform are + # not the same, then host auto-detection based LLVM download does not work + # and the user has to explicitly specify the distribution of LLVM they + # want. -filegroup( - name = "ar", - srcs = ["{toolchain_root}:ar"], -) + # Note that for cross-compiling, the toolchain configuration will need + # appropriate sysroots. A recommended approach is to configure two + # `llvm_toolchain` repos, one without sysroots (for easy single platform + # builds) and register this one, and one with sysroots and provide + # `--extra_toolchains` flag when cross-compiling. -filegroup( - name = "as", - srcs = ["{toolchain_root}:as"], + cc_toolchains_str = "" + toolchain_names = [] + for (target_os, target_arch) in _supported_targets: + suffix = "{}-{}".format(target_arch, target_os) + cc_toolchain_str = _cc_toolchain_str( + suffix, + target_os, + target_arch, + toolchain_info, + use_absolute_paths, + ) + if cc_toolchain_str: + cc_toolchains_str = cc_toolchains_str + cc_toolchain_str + toolchain_name = "@{}//:cc-toolchain-{}".format(workspace_name, suffix) + toolchain_names.append(toolchain_name) + + sep = ",\n" + " " * 8 # 2 tabs with tabstop=4. + toolchain_labels_str = sep.join(["\"{}\"".format(d) for d in toolchain_names]) + return cc_toolchains_str, toolchain_labels_str + +def _cc_toolchain_str( + suffix, + target_os, + target_arch, + toolchain_info, + use_absolute_paths): + host_os = toolchain_info.os + host_arch = toolchain_info.arch + + host_os_bzl = _os_bzl(host_os) + target_os_bzl = _os_bzl(target_os) + + sysroot_path, sysroot = _sysroot_path( + toolchain_info.sysroot_dict, + target_os, + target_arch, + ) + if not sysroot_path: + if host_os == target_os and host_arch == target_arch: + # For darwin -> darwin, we can use the macOS SDK path. + sysroot_path = toolchain_info.default_sysroot_path + else: + # We are trying to cross-compile without a sysroot, let's bail. + # TODO: Are there situations where we can continue? + return "" + + extra_files_str = ", \":llvm\", \":cc-wrapper\"" + + additional_include_dirs = toolchain_info.additional_include_dirs_dict.get(_os_arch_pair(target_os, target_arch)) + additional_include_dirs_str = "[]" + if additional_include_dirs: + additional_include_dirs_str = "[{}]".format( + ", ".join(["\"{}\"".format(d) for d in additional_include_dirs]), + ) + + sysroot_label_str = "\"%s\"" % str(sysroot) if sysroot else "" + + template = """ +# CC toolchain for cc-clang-{suffix}. + +cc_toolchain_config( + name = "local-{suffix}", + host_arch = "{host_arch}", + host_os = "{host_os}", + target_arch = "{target_arch}", + target_os = "{target_os}", + toolchain_path_prefix = "{toolchain_path_prefix}", + tools_path_prefix = "{tools_path_prefix}", + cc_wrapper_prefix = "{cc_wrapper_prefix}", + sysroot_path = "{sysroot_path}", + additional_include_dirs = {additional_include_dirs_str}, + llvm_version = "{llvm_version}", ) -filegroup( - name = "sysroot_components", - srcs = [{sysroot_label}], +toolchain( + name = "cc-toolchain-{suffix}", + exec_compatible_with = [ + "@platforms//cpu:{host_arch}", + "@platforms//os:{host_os_bzl}", + ], + target_compatible_with = [ + "@platforms//cpu:{target_arch}", + "@platforms//os:{target_os_bzl}", + ], + toolchain = ":cc-clang-{suffix}", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", ) +""" + if use_absolute_paths: + template = template + """ +cc_toolchain( + name = "cc-clang-{suffix}", + all_files = ":empty", + compiler_files = ":empty", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + toolchain_config = "local-{suffix}", +""" + else: + template = template + """ filegroup( - name = "binutils_components", - srcs = ["{toolchain_root}:bin"], + name = "sysroot-components-{suffix}", + srcs = [{sysroot_label_str}], ) filegroup( - name = "compiler_components", + name = "compiler-components-{suffix}", srcs = [ "{toolchain_root}:clang", "{toolchain_root}:include", - ":sysroot_components", + ":sysroot-components-{suffix}", ], ) filegroup( - name = "linker_components", + name = "linker-components-{suffix}", srcs = [ "{toolchain_root}:clang", "{toolchain_root}:ld", "{toolchain_root}:ar", "{toolchain_root}:lib", - ":sysroot_components", + ":sysroot-components-{suffix}", ], ) filegroup( - name = "all_components", + name = "all-components-{suffix}", srcs = [ - ":binutils_components", - ":compiler_components", - ":linker_components", + "{toolchain_root}:bin", + ":compiler-components-{suffix}", + ":linker-components-{suffix}", ], ) -""".format(sysroot_label = sysroot_label, toolchain_root = toolchain_root) - -def _cc_toolchain_str(name, toolchain_config, darwin, use_absolute_paths): - extra_files = ", \":llvm\", \":cc_wrapper\"" - - if use_absolute_paths: - template = """ -# CC toolchain for {name} with absolute paths. - -cc_toolchain( - name = "{name}", - all_files = ":empty", - compiler_files = ":empty", - dwp_files = ":empty", - linker_files = ":empty", - objcopy_files = ":empty", - strip_files = ":empty", - toolchain_config = "{toolchain_config}", -) -""" - else: - template = """ -# CC toolchain for {name}. -filegroup(name = "{name}-all-files", srcs = [":all_components"{extra_files}]) -filegroup(name = "{name}-archiver-files", srcs = [":ar"{extra_files}]) -filegroup(name = "{name}-assembler-files", srcs = [":as"{extra_files}]) -filegroup(name = "{name}-compiler-files", srcs = [":compiler_components"{extra_files}]) -filegroup(name = "{name}-linker-files", srcs = [":linker_components"{extra_files}]) +filegroup(name = "all-files-{suffix}", srcs = [":all-components-{suffix}"{extra_files_str}]) +filegroup(name = "archiver-files-{suffix}", srcs = ["{toolchain_root}:ar"{extra_files_str}]) +filegroup(name = "assembler-files-{suffix}", srcs = ["{toolchain_root}:as"{extra_files_str}]) +filegroup(name = "compiler-files-{suffix}", srcs = [":compiler-components-{suffix}"{extra_files_str}]) +filegroup(name = "linker-files-{suffix}", srcs = [":linker-components-{suffix}"{extra_files_str}]) cc_toolchain( - name = "{name}", - all_files = "{name}-all-files", - ar_files = "{name}-archiver-files", - as_files = "{name}-assembler-files", - compiler_files = "{name}-compiler-files", - dwp_files = ":dwp", - linker_files = "{name}-linker-files", - objcopy_files = ":objcopy", + name = "cc-clang-{suffix}", + all_files = "all-files-{suffix}", + ar_files = "archiver-files-{suffix}", + as_files = "assembler-files-{suffix}", + compiler_files = "compiler-files-{suffix}", + dwp_files = "{toolchain_root}:dwp", + linker_files = "linker-files-{suffix}", + objcopy_files = "{toolchain_root}:objcopy", strip_files = ":empty", - toolchain_config = "{toolchain_config}", + toolchain_config = "local-{suffix}", ) """ return template.format( - name = name, - toolchain_config = toolchain_config, - extra_files = extra_files, + suffix = suffix, + target_os = target_os, + target_arch = target_arch, + host_os = host_os, + host_arch = host_arch, + target_os_bzl = target_os_bzl, + host_os_bzl = host_os_bzl, + toolchain_root = toolchain_info.toolchain_root, + toolchain_path_prefix = toolchain_info.toolchain_path_prefix, + tools_path_prefix = toolchain_info.tools_path_prefix, + cc_wrapper_prefix = toolchain_info.cc_wrapper_prefix, + additional_include_dirs_str = additional_include_dirs_str, + sysroot_label_str = sysroot_label_str, + sysroot_path = sysroot_path, + llvm_version = toolchain_info.llvm_version, + extra_files_str = extra_files_str, ) diff --git a/toolchain/internal/sysroot.bzl b/toolchain/internal/sysroot.bzl index a5913d9e..9646d08d 100644 --- a/toolchain/internal/sysroot.bzl +++ b/toolchain/internal/sysroot.bzl @@ -26,18 +26,20 @@ def _darwin_sdk_path(rctx): print(exec_result.stderr) return exec_result.stdout.strip() -def _default_sysroot(rctx, os): +# Default sysroot path can be used when the user has not provided an explicit +# sysroot for the target, and when host platform is the same as target +# platform. +def default_sysroot_path(rctx, os): if os == "darwin": return _darwin_sdk_path(rctx) else: return "" # Return the sysroot path and the label to the files, if sysroot is not a system path. -def sysroot_path(rctx, os, arch): - sysroot = rctx.attr.sysroot.get(_os_arch_pair(os, arch)) - +def sysroot_path(sysroot_dict, os, arch): + sysroot = sysroot_dict.get(_os_arch_pair(os, arch)) if not sysroot: - return (_default_sysroot(rctx, os), None) + return (None, None) # If the sysroot is an absolute path, use it as-is. Check for things that # start with "/" and not "//" to identify absolute paths, but also support diff --git a/toolchain/osx_cc_wrapper.sh.tpl b/toolchain/osx_cc_wrapper.sh.tpl index 0f3997e5..7804a4a3 100755 --- a/toolchain/osx_cc_wrapper.sh.tpl +++ b/toolchain/osx_cc_wrapper.sh.tpl @@ -70,7 +70,7 @@ elif [[ "${BASH_SOURCE[0]}" == "/"* ]]; then # change CWD and call $CC (this script) with its absolute path. # the execroot (i.e. `cmake` from `rules_foreign_cc`) and call CC . For cases like this, # we'll try to find `clang` relative to this script. - # This script is at _execroot_/external/_repo_name_/bin/clang_wrapper.sh + # This script is at _execroot_/external/_repo_name_/bin/cc-wrapper.sh execroot_path="${BASH_SOURCE[0]%/*/*/*/*}" clang="${execroot_path}/%{toolchain_path_prefix}bin/clang" "${clang}" "${@}" diff --git a/toolchain/rules.bzl b/toolchain/rules.bzl index a9b3d686..b7865a99 100644 --- a/toolchain/rules.bzl +++ b/toolchain/rules.bzl @@ -14,7 +14,7 @@ load( "//toolchain/internal:common.bzl", - _supported_os_arch = "SUPPORTED_OS_ARCH", + _supported_os_arch_keys = "supported_os_arch_keys", ) load( "//toolchain/internal:configure.bzl", @@ -67,7 +67,7 @@ _llvm_config_attrs.update({ # we ultimately need to subset the files to be more selective in what we include in the # sandbox for which operations, and it is not straightforward to subset a filegroup. doc = ("System or package path, for each host OS and arch pair you want to support " + - "({}), ".format(", ".join(_supported_os_arch)) + + "({}), ".format(", ".join(_supported_os_arch_keys())) + "to be used as the LLVM toolchain distributions. An empty key can be used to " + "specify a fallback default for all hosts, e.g. with the llvm_toolchain_repo rule. " + "If the value begins with exactly one forward slash '/', then the value is " + @@ -78,7 +78,7 @@ _llvm_config_attrs.update({ "sysroot": attr.string_dict( mandatory = False, doc = ("System path or fileset, for each target OS and arch pair you want to support " + - "({}), ".format(", ".join(_supported_os_arch)) + + "({}), ".format(", ".join(_supported_os_arch_keys())) + "used to indicate the set of files that form the sysroot for the compiler. " + "If the value begins with exactly one forward slash '/', then the value is " + "assumed to be a system path. Else, the value will be assumed to be a label " + @@ -89,13 +89,16 @@ _llvm_config_attrs.update({ mandatory = False, doc = ("Additional builtin include directories to be added to the default system " + "directories, for each target OS and arch pair you want to support " + - "({}); ".format(", ".join(_supported_os_arch)) + + "({}); ".format(", ".join(_supported_os_arch_keys())) + "see documentation for bazel's create_cc_toolchain_config_info."), ), "absolute_paths": attr.bool( default = False, doc = "Use absolute paths in the toolchain. Avoids sandbox overhead.", ), + "_cc_toolchain_config_bzl": attr.label( + default = "//toolchain:cc_toolchain_config.bzl", + ), }) llvm = repository_rule( diff --git a/toolchain/toolchains.bzl.tpl b/toolchain/toolchains.bzl.tpl index 2ff7ef50..2b1961f7 100644 --- a/toolchain/toolchains.bzl.tpl +++ b/toolchain/toolchains.bzl.tpl @@ -14,7 +14,5 @@ def llvm_register_toolchains(): native.register_toolchains( - "@%{toolchain_workspace_name}//:cc-toolchain-k8-linux", - "@%{toolchain_workspace_name}//:cc-toolchain-aarch64-linux", - "@%{toolchain_workspace_name}//:cc-toolchain-darwin", + %{toolchain_labels} )