Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bazel: Better remote caching #29367

Merged
merged 2 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ build --copt=-Wno-error=deprecated-declarations
# Required to stamp our development builds with the current git hash.
#
# This script gets run before every build, see the script for more info.
build --workspace_status_command "python3 misc/bazel/build-info/workspace_status.py"
build:release-stamp --stamp --workspace_status_command "python3 misc/bazel/build-info/workspace_status.py"

# Output all test output by default, this makes it most like cargo.
#
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ store = { path = "misc/cargo-vet" }
# DO NOT EDIT. Automatically generated by bin/gen-lints.
[workspace.lints.rust]
unknown_lints = "allow"
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bazel, coverage, nightly_doc_features, release, tokio_unstable)'] }
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bazel, stamped, coverage, nightly_doc_features, release, tokio_unstable)'] }

[workspace.lints.rustdoc]
unportable_markdown = "allow"
Expand Down
5 changes: 5 additions & 0 deletions misc/bazel/build-info/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ gen_build_info(
rust_file = "build_info.rs",
visibility = ["//visibility:public"],
)

config_setting(
name = "stamped",
values = {"stamp": "true"},
)
35 changes: 35 additions & 0 deletions misc/python/materialize/cli/bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

from materialize import MZ_ROOT, ui

# Path where we put the current revision of the repo that we can side channel
# into Bazel.
MZ_GIT_HASH_FILE = "/tmp/mz_git_hash.txt"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What makes it safe to always write to this file in /tmp? Does Bazel make /tmp hermetic? Not that this is a huge risk, but if you have two worktrees of Materialize and you're building simultaneously, it seems like they'll compete to write to this file?

Is there a way to write to a file in the current repo instead? That seems a bit less worrying to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use an env var instead? If the build is allowed to read random files maybe we can just read a special env var?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately it's not safe, two different Materialize worktrees building Bazel at the same time will totally compete with each other. The issue is Bazel doesn't expose the path to the current repo in a way that wouldn't invalidate the cache. Same with environment variables, anything we try passing into for the build process of a rust_binary or rust_library is tracked as well.

I'll ask around and see if I can find a better solution, but going to merge as-is for now!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm building in multiple repos at once all the time. I guess we also can't use something like the PID in the path?



def main() -> int:
parser = argparse.ArgumentParser(
Expand All @@ -26,6 +30,9 @@ def main() -> int:

(args, sub_args) = parser.parse_known_args()

# Always update our side-channel git hash incase some command needs it.
write_git_hash()

if args.action == "gen":
gen_cmd(sub_args)
elif args.action == "fmt":
Expand Down Expand Up @@ -147,5 +154,33 @@ def output_path(target) -> pathlib.Path:
return pathlib.Path(path)


def write_git_hash():
"""
Temporary file where we write the current git hash, so we can side channel
it into Bazel.

For production releases we stamp builds with the `workspace_status_command`
but this workflow is not friendly to remote caching. Specifically, the
"volatile status" of a workspace is not supposed to cause builds to get
invalidated, and it doesn't when the result is cached locally, but it does
when it's cached remotely.

See: <https://bazel.build/docs/user-manual#workspace-status>
<https://github.com/bazelbuild/bazel/issues/10075>
"""

repo = MZ_ROOT / ".git"
cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"]
result = subprocess.run(
cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
)

if result.returncode == 0:
with open(MZ_GIT_HASH_FILE, "w") as f:
f.write(result.stdout.strip())
else:
ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion misc/python/materialize/cli/gen-lints.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@

EXCLUDE_CRATES = ["workspace-hack"]

CHECK_CFGS = "bazel, coverage, nightly_doc_features, release, tokio_unstable"
CHECK_CFGS = "bazel, stamped, coverage, nightly_doc_features, release, tokio_unstable"


def main() -> None:
Expand Down
2 changes: 1 addition & 1 deletion misc/wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ store = { path = "../cargo-vet" }
# DO NOT EDIT. Automatically generated by bin/gen-lints.
[workspace.lints.rust]
unknown_lints = "allow"
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bazel, coverage, nightly_doc_features, release, tokio_unstable)'] }
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bazel, stamped, coverage, nightly_doc_features, release, tokio_unstable)'] }

[workspace.lints.rustdoc]
unportable_markdown = "allow"
Expand Down
20 changes: 16 additions & 4 deletions src/build-info/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,24 @@ rust_library(
normal = True,
proc_macro = True,
),
compile_data = ["@//misc/bazel/build-info:gen_build_info"],
compile_data = select({
"@//misc/bazel/build-info:stamped": ["@//misc/bazel/build-info:gen_build_info"],
"//conditions:default": [],
}),
crate_features = [
"default",
"semver",
],
data = [],
proc_macro_deps = [] + all_crate_deps(proc_macro = True),
rustc_env = {"BAZEL_GEN_BUILD_INFO": "$(execpath @//misc/bazel/build-info:gen_build_info)"},
rustc_flags = ["--cfg=bazel"],
rustc_env = select({
"@//misc/bazel/build-info:stamped": {"BAZEL_GEN_BUILD_INFO": "$(execpath @//misc/bazel/build-info:gen_build_info)"},
"//conditions:default": {},
}),
rustc_flags = ["--cfg=bazel"] + select({
"@//misc/bazel/build-info:stamped": ["--cfg=stamped"],
"//conditions:default": [],
}),
version = "0.0.0",
deps = [":mz_build_info_build_script"] + all_crate_deps(normal = True),
)
Expand All @@ -57,7 +66,10 @@ rust_test(
proc_macro_dev = True,
),
rustc_env = {},
rustc_flags = ["--cfg=bazel"],
rustc_flags = ["--cfg=bazel"] + select({
"@//misc/bazel/build-info:stamped": ["--cfg=stamped"],
"//conditions:default": [],
}),
version = "0.0.0",
deps = [] + all_crate_deps(
normal = True,
Expand Down
18 changes: 4 additions & 14 deletions src/build-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,7 @@ default = ["semver", "workspace-hack"]
[package.metadata.cargo-udeps.ignore]
normal = ["workspace-hack"]

[package.metadata.cargo-gazelle.lib]
compile_data = ["@//misc/bazel/build-info:gen_build_info"]
rustc_flags = ["--cfg=bazel"]

# Skip generating doc tests because there isn't a way to set the rustc flags
# used for the test, so we can't set the `--cfg=bazel` flag.
[package.metadata.cargo-gazelle.test.doc]
skip = true

[package.metadata.cargo-gazelle.lib.rustc_env]
BAZEL_GEN_BUILD_INFO = "$(execpath @//misc/bazel/build-info:gen_build_info)"

[package.metadata.cargo-gazelle.test.lib]
rustc_flags = ["--cfg=bazel"]
# This crate requires some pretty specific configuration, so we manually
# maintain the BUILD file instead of having cargo-gazelle generate it for us.
[package.metadata.cargo-gazelle]
skip_generating = true
33 changes: 22 additions & 11 deletions src/build-info/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,34 @@ macro_rules! build_info {
};
}

#[cfg(bazel)]
#[cfg(all(bazel, stamped))]
#[macro_export]
macro_rules! __git_sha_internal {
() => {
$crate::private::bazel_variables::GIT_COMMIT_HASH
};
}

// To improve the effectiveness of remote caching we only stamp "release" builds
// and otherwise side-channel git status through a known file.
//
// See: <https://github.com/bazelbuild/bazel/issues/10075>
#[cfg(all(bazel, not(stamped)))]
#[macro_export]
macro_rules! __git_sha_internal {
() => {
$crate::private::run_command_str!(
"sh",
"-c",
r#"if [ -f /tmp/mz_git_hash.txt ]; then
cat /tmp/mz_git_hash.txt
else
echo "0000000000000000000000000000000000000000"
fi"#
)
};
}

#[cfg(not(bazel))]
#[macro_export]
macro_rules! __git_sha_internal {
Expand Down Expand Up @@ -125,15 +145,6 @@ macro_rules! __git_sha_internal {
}
}

#[cfg(bazel)]
#[macro_export]
macro_rules! __build_time_internal {
() => {
$crate::private::bazel_variables::MZ_BUILD_TIME
};
}

#[cfg(not(bazel))]
#[macro_export]
macro_rules! __build_time_internal {
() => {
Expand All @@ -148,7 +159,7 @@ pub mod private {
// Bazel has a "workspace status" feature that allows us to collect info from
// the workspace (e.g. git hash) at build time. These values get incleded via
// a generated file.
#[cfg(bazel)]
#[cfg(all(bazel, stamped))]
#[allow(unused)]
pub mod bazel_variables {
include!(std::env!("BAZEL_GEN_BUILD_INFO"));
Expand Down
Loading