Skip to content

Commit

Permalink
WIP: Rewinds for lost inputs when trying to upload
Browse files Browse the repository at this point in the history
Steps to reproduce
------------------

1. Make a target like:
```
go_binary(
    name = "bin",
    srcs = ["main.go"],
)
```

2. Build it with remote execution and `--remote_download_toplevel`

3. Flush the remote storage (so that things like the Go builder are
evicted)

4. Modify main.go so that bazel sees a rebuild is needed.

5. Build again with remote execution

You should see a failure whose underlying cause is a
BulkTransferException caused by a FileNotFoundException.

Current status
--------------

This currently semi-works. The second build will fail, but if you do a
third build, it will succeed. I suspect this may be beacuse Skyframe
needs configuring to allow in-build restarts somewhere, but that's still
to be worked out.

There are also a smattering of TODOs across the commit, but none of
those are particularly scary or complicated :)

See bazelbuild#8250 (comment)
  • Loading branch information
illicitonion committed Jun 25, 2021
1 parent 6b33bdb commit 0cc9ee5
Show file tree
Hide file tree
Showing 15 changed files with 107 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ public ImmutableList<SpawnResult> exec(
}
} catch (InterruptedIOException e) {
throw new InterruptedException(e.getMessage());
} catch (LostInputsExecException e) {
throw e;
} catch (IOException e) {
throw new EnvironmentalExecException(
e,
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ java_library(
],
deps = [
":ExecutionStatusException",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/remote/options",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import build.bazel.remote.execution.v2.ExecutionGrpc.ExecutionBlockingStub;
import build.bazel.remote.execution.v2.WaitExecutionRequest;
import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.authandtls.CallCredentialsProvider;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.remote.RemoteRetrier.ProgressiveBackoff;
Expand Down Expand Up @@ -108,7 +109,7 @@ private static class Execution {
this.executionBlockingStubSupplier = executionBlockingStubSupplier;
}

ExecuteResponse start() throws IOException, InterruptedException {
ExecuteResponse start() throws ExecException, IOException, InterruptedException {
// Execute has two components: the Execute call and (optionally) the WaitExecution call.
// This is the simple flow without any errors:
//
Expand Down Expand Up @@ -315,7 +316,12 @@ public ExecuteResponse executeRemotely(ExecuteRequest request, OperationObserver
Execution execution =
new Execution(
request, observer, retrier, callCredentialsProvider, this::executionBlockingStub);
return execution.start();
try {
return execution.start();
} catch (ExecException e) {
// TODO: Work out what to actually do here
throw new IOException(e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,25 @@

import build.bazel.remote.execution.v2.Digest;
import build.bazel.remote.execution.v2.Directory;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.devtools.build.lib.actions.ActionInputDepOwnerMap;
import com.google.devtools.build.lib.actions.Artifact.DerivedArtifact;
import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValue;
import com.google.devtools.build.lib.actions.LostInputsActionExecutionException;
import com.google.devtools.build.lib.actions.LostInputsExecException;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient;
import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
import com.google.devtools.build.lib.remote.merkletree.MerkleTree.PathOrBytes;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
import com.google.devtools.build.lib.remote.util.DigestUtil;
import com.google.protobuf.Message;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
Expand All @@ -53,7 +62,7 @@ public RemoteExecutionCache(
* However, remote execution uses a cache to store input files, and that may be a separate
* end-point from the executor itself, so the functionality lives here.
*/
public void ensureInputsPresent(MerkleTree merkleTree, Map<Digest, Message> additionalInputs)
public void ensureInputsPresent(MerkleTree merkleTree, Map<Digest, Message> additionalInputs, String actionId)
throws IOException, InterruptedException {
Iterable<Digest> allDigests =
Iterables.concat(merkleTree.getAllDigests(), additionalInputs.keySet());
Expand All @@ -62,14 +71,14 @@ public void ensureInputsPresent(MerkleTree merkleTree, Map<Digest, Message> addi

List<ListenableFuture<Void>> uploadFutures = new ArrayList<>();
for (Digest missingDigest : missingDigests) {
uploadFutures.add(uploadBlob(missingDigest, merkleTree, additionalInputs));
uploadFutures.add(uploadBlob(missingDigest, merkleTree, additionalInputs, actionId));
}

waitForBulkTransfer(uploadFutures, /* cancelRemainingOnInterrupt=*/ false);
}

private ListenableFuture<Void> uploadBlob(
Digest digest, MerkleTree merkleTree, Map<Digest, Message> additionalInputs) {
Digest digest, MerkleTree merkleTree, Map<Digest, Message> additionalInputs, String actionId) {
Directory node = merkleTree.getDirectoryByDigest(digest);
if (node != null) {
return cacheProtocol.uploadBlob(digest, node.toByteString());
Expand All @@ -80,7 +89,22 @@ private ListenableFuture<Void> uploadBlob(
if (file.getBytes() != null) {
return cacheProtocol.uploadBlob(digest, file.getBytes());
}
return cacheProtocol.uploadFile(digest, file.getPath());
return Futures.catchingAsync(cacheProtocol.uploadFile(digest, file.getPath()),
FileNotFoundException.class, e -> {
if (file.getArtifact() != null) {
ActionInputDepOwnerMap owners = new ActionInputDepOwnerMap(
ImmutableList.of(file.getArtifact()));
RemoteFileArtifactValue artifactValue = new RemoteFileArtifactValue(
DigestUtil.toBinaryDigest(digest),
digest.getSizeBytes(),
/*locationIndex=*/ 1,
actionId);
owners.put(file.getArtifact(), artifactValue, (DerivedArtifact) file.getArtifact());
return Futures.immediateFailedFuture(new LostInputsExecException(ImmutableMap.of(digest.getHash(), file.getArtifact()),
owners));
}
return Futures.immediateFailedFuture(e);
}, MoreExecutors.directExecutor());
}

Message message = additionalInputs.get(digest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public ExecutionResult execute(
additionalInputs.put(actionDigest, action);
additionalInputs.put(commandHash, command);

remoteCache.ensureInputsPresent(merkleTree, additionalInputs);
remoteCache.ensureInputsPresent(merkleTree, additionalInputs, "");
}

try (SilentCloseable c =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.util.concurrent.ListeningScheduledExecutorService;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
Expand Down Expand Up @@ -110,7 +111,7 @@ public RemoteRetrier(
* RuntimeException}.
*/
@Override
public <T> T execute(Callable<T> call) throws IOException, InterruptedException {
public <T> T execute(Callable<T> call) throws ExecException, IOException, InterruptedException {
return execute(call, newBackoff());
}

Expand All @@ -120,12 +121,13 @@ public <T> T execute(Callable<T> call) throws IOException, InterruptedException
* in {@link RuntimeException}.
*/
@Override
public <T> T execute(Callable<T> call, Backoff backoff) throws IOException, InterruptedException {
public <T> T execute(Callable<T> call, Backoff backoff) throws ExecException, IOException, InterruptedException {
try {
return super.execute(call, backoff);
} catch (Exception e) {
Throwables.throwIfInstanceOf(e, IOException.class);
Throwables.throwIfInstanceOf(e, InterruptedException.class);
Throwables.throwIfInstanceOf(e, ExecException.class);
Throwables.throwIfUnchecked(e);
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import build.bazel.remote.execution.v2.ServerCapabilities;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
import com.google.devtools.build.lib.remote.util.TracingMetadataUtils;
import io.grpc.CallCredentials;
Expand Down Expand Up @@ -80,6 +81,9 @@ public ServerCapabilities get(String buildRequestId, String commandId)
throw (IOException) e.getCause();
}
throw new IOException(e);
} catch (ExecException e) {
// TODO: Work out what to actually do here
throw new IOException(e);
} finally {
withMetadata.detach(previous);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.CommandLines.ParamFileActionInput;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.actions.LostInputsExecException;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnMetrics;
import com.google.devtools.build.lib.actions.SpawnResult;
Expand Down Expand Up @@ -282,7 +283,7 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
// Look up action cache, and reuse the action output if it is found.
ActionKey actionKey = digestUtil.computeActionKey(action);
Context withMetadata =
TracingMetadataUtils.contextWithMetadata(buildRequestId, commandId, actionKey)
TracingMetadataUtils.contextWithMetadata(buildRequestId, commandId, actionKey.getDigest().getHash())
.withValue(NetworkTime.CONTEXT_KEY, networkTime);
Context previous = withMetadata.attach();
Profiler prof = Profiler.instance();
Expand Down Expand Up @@ -356,7 +357,18 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
additionalInputs.put(commandHash, command);
Duration networkTimeStart = networkTime.getDuration();
Stopwatch uploadTime = Stopwatch.createStarted();
remoteCache.ensureInputsPresent(merkleTree, additionalInputs);
try {
remoteCache.ensureInputsPresent(merkleTree, additionalInputs, actionKey.toString());
} catch (BulkTransferException e) {
// TODO: Check over all exceptions and merge them, maybe as a built-in piece of
// functionality in BulkTransferException (like isOnlyCausedByCacheNotFoundException),
// rather than asserting there's only one.
if (e.getSuppressed().length == 1 && e instanceof IOException && e.getSuppressed()[0].getCause() != null && e.getSuppressed()[0].getCause() instanceof LostInputsExecException) {
throw (LostInputsExecException) e.getSuppressed()[0].getCause();
} else {
throw e;
}
}
// subtract network time consumed here to ensure wall clock during upload is not
// double
// counted, and metrics time computation does not exceed total time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ java_library(
name = "downloader",
srcs = glob(["*.java"]),
deps = [
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/remote:ReferenceCountedChannel",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import build.bazel.remote.execution.v2.Digest;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.bazel.repository.downloader.Checksum;
import com.google.devtools.build.lib.bazel.repository.downloader.Downloader;
import com.google.devtools.build.lib.bazel.repository.downloader.HashOutputStream;
Expand Down Expand Up @@ -127,6 +128,9 @@ public void download(
}));
} catch (StatusRuntimeException e) {
throw new IOException(e);
} catch (ExecException e) {
// TODO: Work out what to actually do here
throw new IOException(e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.protobuf.ByteString;
Expand All @@ -26,6 +28,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.SortedSet;
import javax.annotation.Nullable;

/**
* Intermediate tree representation of a list of lexicographically sorted list of files. Each node
Expand Down Expand Up @@ -71,20 +74,22 @@ public boolean equals(Object o) {
static class FileNode extends Node {
private final Path path;
private final ByteString data;
private final ActionInput artifact;
private final Digest digest;

FileNode(String pathSegment, Path path, Digest digest) {
FileNode(String pathSegment, Path path, ActionInput artifact, Digest digest) {
super(pathSegment);
this.path = Preconditions.checkNotNull(path, "path");
this.data = null;
this.digest = Preconditions.checkNotNull(digest, "digest");
this.artifact = artifact;
}

FileNode(String pathSegment, ByteString data, Digest digest) {
super(pathSegment);
this.path = null;
this.artifact = null;
this.data = Preconditions.checkNotNull(data, "data");
;
this.digest = Preconditions.checkNotNull(digest, "digest");
}

Expand All @@ -100,9 +105,13 @@ ByteString getBytes() {
return data;
}

@Nullable ActionInput getArtifact() {
return artifact;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), path, data, digest);
return Objects.hash(super.hashCode(), path, data, digest, artifact);
}

@Override
Expand All @@ -112,7 +121,8 @@ public boolean equals(Object o) {
return super.equals(other)
&& Objects.equals(path, other.path)
&& Objects.equals(data, other.data)
&& Objects.equals(digest, other.digest);
&& Objects.equals(digest, other.digest)
&& Objects.equals(artifact, other.artifact);
}
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ private static int buildFromPaths(
throw new IOException(String.format("Input '%s' is not a file.", input));
}
Digest d = digestUtil.compute(input);
currDir.addChild(new FileNode(path.getBaseName(), input, d));
currDir.addChild(new FileNode(path.getBaseName(), input, null, d));
return 1;
});
}
Expand Down Expand Up @@ -141,7 +141,7 @@ private static int buildFromActionInputs(
Digest d = DigestUtil.buildDigest(metadata.getDigest(), metadata.getSize());
currDir.addChild(
new FileNode(
path.getBaseName(), ActionInputHelper.toInputPath(input, execRoot), d));
path.getBaseName(), ActionInputHelper.toInputPath(input, execRoot), input, d));
return 1;

case DIRECTORY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.Artifact.DerivedArtifact;
import com.google.devtools.build.lib.actions.MetadataProvider;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.SilentCloseable;
Expand All @@ -42,17 +44,20 @@ public class MerkleTree {
/** A path or contents */
public static class PathOrBytes {

private final Path path;
private final ByteString bytes;
@Nullable private final Path path;
@Nullable private final ByteString bytes;
@Nullable private final ActionInput artifact;

public PathOrBytes(Path path) {
public PathOrBytes(Path path, @Nullable ActionInput artifact) {
this.path = Preconditions.checkNotNull(path, "path");
this.bytes = null;
this.artifact = artifact;
}

public PathOrBytes(ByteString bytes) {
this.bytes = Preconditions.checkNotNull(bytes, "bytes");
this.path = null;
this.artifact = null;
}

@Nullable
Expand All @@ -64,6 +69,11 @@ public Path getPath() {
public ByteString getBytes() {
return bytes;
}

@Nullable
public ActionInput getArtifact() {
return artifact;
}
}

private final Map<Digest, Directory> digestDirectoryMap;
Expand Down Expand Up @@ -211,7 +221,7 @@ private static DirectoryNode buildProto(DirectoryTree.DirectoryNode dir, Digest

private static PathOrBytes toPathOrBytes(DirectoryTree.FileNode file) {
return file.getPath() != null
? new PathOrBytes(file.getPath())
? new PathOrBytes(file.getPath(), file.getArtifact())
: new PathOrBytes(file.getBytes());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,12 @@ private static Set<Artifact.DerivedArtifact> getLostInputOwningDirectDeps(
}
}

if (lostInput instanceof Artifact
&& failedActionDeps.contains(Artifact.key((Artifact) lostInput))) {
if (lostInput instanceof Artifact) {
// TODO: The below check appears to check that the set of SkyKeys contains an element which
// exact-matches the lost input, but the lost input itself is actually contained in the the NestedSet _inside_ one of the SkyKeys which is a ArtifactNestedSetKey.
// This logic should probably do something along the lines of:
// && failedActionDeps.stream.anyMatch(skyKey -> skyKey instanceof ArtifactNestedSetKey && ((ArtifactNestedSetKey)skyKey).getSet().contains(Artifact.key((Artifact) lostInput)) {
//&& failedActionDeps.contains(Artifact.key((Artifact) lostInput))) {
checkDerived(
/*lostInputQualifier=*/ "", (Artifact) lostInput, failedAction, lostInputsException);

Expand Down
Loading

0 comments on commit 0cc9ee5

Please sign in to comment.