From 173ef9d9dff36264b738f24d2f13445a5bdca670 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Mon, 16 Jan 2023 04:23:42 -0800 Subject: [PATCH] Dont query remote cache but always use bytestream protocol When using `--experimental_remote_build_event_upload=minimal`, instead of querying remote cache to decide whether convert local path of a file to `bytestream://`, it now always use `bytestream://` for BEP referenced files. Closes https://github.com/bazelbuild/bazel/pull/16999. Closes #17025. PiperOrigin-RevId: 502351401 Change-Id: Ic858367ffaf3c2a2c20db88ada85fbbe1d93aae8 --- .../ByteStreamBuildEventArtifactUploader.java | 40 ++++--- .../lib/remote/options/RemoteOptions.java | 5 +- .../remote_build_event_uploader_test.sh | 101 +++++++++++------- src/test/shell/bazel/remote/remote_utils.sh | 9 ++ 4 files changed, 97 insertions(+), 58 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java index 89f0da82b7a0ae..8eab0e6666479f 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java @@ -22,6 +22,7 @@ import build.bazel.remote.execution.v2.RequestMetadata; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.eventbus.Subscribe; import com.google.common.util.concurrent.ListenableFuture; @@ -47,7 +48,6 @@ import io.reactivex.rxjava3.schedulers.Schedulers; import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -107,10 +107,16 @@ class ByteStreamBuildEventArtifactUploader extends AbstractReferenceCounted } public void omitFile(Path file) { + Preconditions.checkState( + remoteBuildEventUploadMode != RemoteBuildEventUploadMode.MINIMAL, + "Cannot omit file in MINIMAL mode"); omittedFiles.add(file.asFragment()); } public void omitTree(Path treeRoot) { + Preconditions.checkState( + remoteBuildEventUploadMode != RemoteBuildEventUploadMode.MINIMAL, + "Cannot omit tree in MINIMAL mode"); omittedTreeRoots.add(treeRoot.asFragment()); } @@ -209,10 +215,6 @@ private static void processQueryResult( } } - private boolean shouldQuery(PathMetadata path) { - return path.getDigest() != null && !path.isRemote() && !path.isDirectory(); - } - private boolean shouldUpload(PathMetadata path) { boolean result = path.getDigest() != null && !path.isRemote() && !path.isDirectory() && !path.isOmitted(); @@ -239,7 +241,8 @@ private Single> queryRemoteCache( List filesToQuery = new ArrayList<>(); Set digestsToQuery = new HashSet<>(); for (PathMetadata path : paths) { - if (shouldQuery(path)) { + // Query remote cache for files even if omitted from uploading + if (shouldUpload(path) || path.isOmitted()) { filesToQuery.add(path); digestsToQuery.add(path.getDigest()); } else { @@ -329,16 +332,19 @@ private Single upload(Set files) { reporterUploadError(e); return new PathMetadata( file, - /*digest=*/ null, - /*directory=*/ false, - /*remote=*/ false, - /*omitted=*/ false); + /* digest= */ null, + /* directory= */ false, + /* remote= */ false, + /* omitted= */ false); } }) .collect(Collectors.toList()) .flatMap(paths -> queryRemoteCache(remoteCache, context, paths)) .flatMap(paths -> uploadLocalFiles(remoteCache, context, paths)) - .map(paths -> new PathConverterImpl(remoteServerInstanceName, paths)), + .map( + paths -> + new PathConverterImpl( + remoteServerInstanceName, paths, remoteBuildEventUploadMode)), RemoteCache::release); } @@ -377,17 +383,23 @@ private static class PathConverterImpl implements PathConverter { private final Set skippedPaths; private final Set localPaths; - PathConverterImpl(String remoteServerInstanceName, List uploads) { + PathConverterImpl( + String remoteServerInstanceName, + List uploads, + RemoteBuildEventUploadMode remoteBuildEventUploadMode) { Preconditions.checkNotNull(uploads); this.remoteServerInstanceName = remoteServerInstanceName; - pathToDigest = new HashMap<>(uploads.size()); + pathToDigest = Maps.newHashMapWithExpectedSize(uploads.size()); ImmutableSet.Builder skippedPaths = ImmutableSet.builder(); ImmutableSet.Builder localPaths = ImmutableSet.builder(); for (PathMetadata pair : uploads) { Path path = pair.getPath(); Digest digest = pair.getDigest(); if (digest != null) { - if (pair.isRemote()) { + // Always use bytestream:// in MINIMAL mode + if (remoteBuildEventUploadMode == RemoteBuildEventUploadMode.MINIMAL) { + pathToDigest.put(path, digest); + } else if (pair.isRemote()) { pathToDigest.put(path, digest); } else { localPaths.add(path); diff --git a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java index 90b8a015bef571..c812a423aa3f77 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java +++ b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java @@ -302,9 +302,8 @@ public String getTypeDescription() { "If set to 'all', all local outputs referenced by BEP are uploaded to remote cache.\n" + "If set to 'minimal', local outputs referenced by BEP are not uploaded to the" + " remote cache, except for files that are important to the consumers of BEP (e.g." - + " test logs and timing profile).\n" - + "file:// scheme is used for the paths of local files and bytestream:// scheme is" - + " used for the paths of (already) uploaded files.\n" + + " test logs and timing profile). bytestream:// scheme is always used for the uri of" + + " files even if they are missing from remote cache.\n" + "Default to 'all'.") public RemoteBuildEventUploadMode remoteBuildEventUploadMode; diff --git a/src/test/shell/bazel/remote/remote_build_event_uploader_test.sh b/src/test/shell/bazel/remote/remote_build_event_uploader_test.sh index 4ab18ea3ec6645..6e9827162dccee 100755 --- a/src/test/shell/bazel/remote/remote_build_event_uploader_test.sh +++ b/src/test/shell/bazel/remote/remote_build_event_uploader_test.sh @@ -71,6 +71,32 @@ else declare -r EXE_EXT="" fi +BEP_JSON=bep.json + +function expect_bes_file_uploaded() { + local file=$1 + if [[ $(cat $BEP_JSON) =~ ${file}\",\"uri\":\"bytestream://localhost:${worker_port}/blobs/([^/]*) ]]; then + if ! remote_cas_file_exist ${BASH_REMATCH[1]}; then + cat $BEP_JSON >> $TEST_log && append_remote_cas_files $TEST_log && fail "$file is not uploaded" + fi + else + cat $BEP_JSON > $TEST_log + fail "$file is not converted to bytestream://" + fi +} + +function expect_bes_file_not_uploaded() { + local file=$1 + if [[ $(cat $BEP_JSON) =~ ${file}\",\"uri\":\"bytestream://localhost:${worker_port}/blobs/([^/]*) ]]; then + if remote_cas_file_exist ${BASH_REMATCH[1]}; then + cat $BEP_JSON >> $TEST_log && append_remote_cas_files $TEST_log && fail "$file is uploaded" + fi + else + cat $BEP_JSON > $TEST_log + fail "$file is not converted to bytestream://" + fi +} + function test_upload_minimal_convert_paths_for_existed_blobs() { mkdir -p a cat > a/BUILD <& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_log "a:foo.*bytestream://" || fail "paths for existed blobs should be converted" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_uploaded foo.txt + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_doesnt_upload_missing_blobs() { @@ -106,12 +131,11 @@ EOF bazel build \ --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_not_uploaded foo.txt + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_respect_no_upload_results() { @@ -128,12 +152,11 @@ EOF --remote_cache=grpc://localhost:${worker_port} \ --remote_upload_local_results=false \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_not_uploaded foo.txt + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_respect_no_upload_results_combined_cache() { @@ -154,12 +177,11 @@ EOF --incompatible_remote_results_ignore_disk \ --remote_upload_local_results=false \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_not_uploaded foo.txt + expect_bes_file_uploaded command.profile.gz remote_cas_files="$(count_remote_cas_files)" [[ "$remote_cas_files" == 1 ]] || fail "Expected 1 remote cas entries, not $remote_cas_files" disk_cas_files="$(count_disk_cas_files $cache_dir)" @@ -187,12 +209,11 @@ EOF bazel build \ --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo-alias >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "a:foo.*bytestream://" - expect_log "command.profile.gz.*bytestream://" + expect_bes_file_not_uploaded foo.txt + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_trees_doesnt_upload_missing_blobs() { @@ -204,8 +225,9 @@ def _gen_output_dir_impl(ctx): outputs = [output_dir], inputs = [], command = """ - mkdir -p $1/sub; \ - index=0; while ((index<10)); do echo $index >$1/$index.txt; index=$(($index+1)); done + echo 0 > $1/0.txt + echo 1 > $1/1.txt + mkdir -p $1/sub echo "Shuffle, duffle, muzzle, muff" > $1/sub/bar """, arguments = [output_dir.path], @@ -233,13 +255,13 @@ EOF bazel build \ --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "a:foo.*bytestream://" || fail "local tree files are uploaded" - expect_not_log "a/dir/.*bytestream://" || fail "local tree files are uploaded" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_not_uploaded dir/0.txt + expect_bes_file_not_uploaded dir/1.txt + expect_bes_file_not_uploaded dir/sub/bar + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_upload_testlogs() { @@ -259,14 +281,13 @@ EOF bazel test \ --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:test >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_not_log "test.sh.*bytestream://" || fail "test script is uploaded" - expect_log "test.log.*bytestream://" || fail "should upload test.log" - expect_log "test.xml.*bytestream://" || fail "should upload test.xml" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_not_uploaded test.sh + expect_bes_file_uploaded test.log + expect_bes_file_uploaded test.xml + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_upload_buildlogs() { @@ -283,13 +304,12 @@ EOF bazel build \ --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || true - cat bep.json > $TEST_log - expect_log "stdout.*bytestream://" || fail "should upload stdout" - expect_log "stderr.*bytestream://" || fail "should upload stderr" - expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_uploaded stdout + expect_bes_file_uploaded stderr + expect_bes_file_uploaded command.profile.gz } function test_upload_minimal_upload_profile() { @@ -306,11 +326,10 @@ EOF --remote_executor=grpc://localhost:${worker_port} \ --experimental_remote_build_event_upload=minimal \ --profile=mycommand.profile.gz \ - --build_event_json_file=bep.json \ + --build_event_json_file=$BEP_JSON \ //a:foo >& $TEST_log || fail "Failed to build" - cat bep.json > $TEST_log - expect_log "mycommand.profile.gz.*bytestream://" || fail "should upload profile data" + expect_bes_file_uploaded "mycommand.profile.gz" } run_suite "Remote build event uploader tests" diff --git a/src/test/shell/bazel/remote/remote_utils.sh b/src/test/shell/bazel/remote/remote_utils.sh index 6317c1ffd608fe..0facf1b03beab4 100644 --- a/src/test/shell/bazel/remote/remote_utils.sh +++ b/src/test/shell/bazel/remote/remote_utils.sh @@ -96,3 +96,12 @@ function count_remote_cas_files() { echo 0 fi } + +function remote_cas_file_exist() { + local file=$1 + [ -f "$cas_path/cas/${file:0:2}/$file" ] +} + +function append_remote_cas_files() { + find "$cas_path/cas" -type f >> $1 +}