Skip to content

Commit

Permalink
Remote: Report checking cache status before the action is scheduled t…
Browse files Browse the repository at this point in the history
…o run remotely.

Add a Caching state to ActionState. This state indicates the action is checking the cache and should be happened before Scheduling state.

Change ProgressStatus from enum to interface so that we can pass more data to the event handler (which is required to report upload/download details later).

Fixes #13531.

Closes #13555.

PiperOrigin-RevId: 378800212
  • Loading branch information
coeuvre authored and copybara-github committed Jun 11, 2021
1 parent f1b1cc5 commit f0983df
Show file tree
Hide file tree
Showing 19 changed files with 346 additions and 84 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.actions;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.events.ExtendedEventHandler.ProgressLike;

/** Notifies that an in-flight action is checking the cache. */
@AutoValue
public abstract class CachingActionEvent implements ProgressLike {

public static CachingActionEvent create(ActionExecutionMetadata action, String strategy) {
return new AutoValue_CachingActionEvent(
action, checkNotNull(strategy, "Strategy names are not optional"));
}

/** Gets the metadata associated with the action. */
public abstract ActionExecutionMetadata action();

/** Gets the name of the strategy on which the action is caching. */
public abstract String strategy();
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
import com.google.devtools.build.lib.actions.LostInputsActionExecutionException;
import com.google.devtools.build.lib.actions.LostInputsExecException;
import com.google.devtools.build.lib.actions.MetadataProvider;
import com.google.devtools.build.lib.actions.RunningActionEvent;
import com.google.devtools.build.lib.actions.SandboxedSpawnStrategy;
import com.google.devtools.build.lib.actions.SchedulingActionEvent;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnExecutedEvent;
import com.google.devtools.build.lib.actions.SpawnResult;
Expand Down Expand Up @@ -318,7 +316,7 @@ public SortedMap<PathFragment, ActionInput> getInputMapping(PathFragment baseDir
}

@Override
public void report(ProgressStatus state, String name) {
public void report(ProgressStatus progress) {
ActionExecutionMetadata action = spawn.getResourceOwner();
if (action.getOwner() == null) {
return;
Expand All @@ -332,17 +330,7 @@ public void report(ProgressStatus state, String name) {

// TODO(ulfjack): We should report more details to the UI.
ExtendedEventHandler eventHandler = actionExecutionContext.getEventHandler();
switch (state) {
case EXECUTING:
case CHECKING_CACHE:
eventHandler.post(new RunningActionEvent(action, name));
break;
case SCHEDULING:
eventHandler.post(new SchedulingActionEvent(action, name));
break;
default:
break;
}
progress.postTo(eventHandler, action);
}

@Override
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/com/google/devtools/build/lib/exec/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,21 @@ java_library(

java_library(
name = "spawn_runner",
srcs = ["SpawnRunner.java"],
srcs = [
"SpawnCheckingCacheEvent.java",
"SpawnExecutingEvent.java",
"SpawnRunner.java",
"SpawnSchedulingEvent.java",
],
deps = [
":tree_deleter",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//third_party:auto_value",
"//third_party:jsr305",
],
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.CachingActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/** Notifies that {@link SpawnRunner} is looking for a cache hit. */
@AutoValue
public abstract class SpawnCheckingCacheEvent implements ProgressStatus {
public static SpawnCheckingCacheEvent create(String name) {
return new AutoValue_SpawnCheckingCacheEvent(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(CachingActionEvent.create(action, name()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.RunningActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/**
* Notifies that {@link SpawnRunner} failed to find a cache hit and acquired the resources to
* execute. This MUST be posted before attempting to execute the subprocess.
*
* <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache lookup,
* but may post this if cache lookup and execution happen within the same step, e.g. as part of a
* single RPC call with no mechanism to report cache misses.
*/
@AutoValue
public abstract class SpawnExecutingEvent implements ProgressStatus {
public static SpawnExecutingEvent create(String name) {
return new AutoValue_SpawnExecutingEvent(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(new RunningActionEvent(action, name()));
}
}
26 changes: 6 additions & 20 deletions src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package com.google.devtools.build.lib.exec;

import com.google.devtools.build.lib.actions.ActionContext;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -25,6 +26,7 @@
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.cache.MetadataInjector;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -104,25 +106,9 @@ public interface SpawnRunner {
* <p>{@link SpawnRunner} implementations should post a progress status before any potentially
* long-running operation.
*/
enum ProgressStatus {
/** Spawn is waiting for local or remote resources to become available. */
SCHEDULING,

/** The {@link SpawnRunner} is looking for a cache hit. */
CHECKING_CACHE,

/**
* Resources are acquired, and there was probably no cache hit. This MUST be posted before
* attempting to execute the subprocess.
*
* <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache
* lookup, but may post this if cache lookup and execution happen within the same step, e.g. as
* part of a single RPC call with no mechanism to report cache misses.
*/
EXECUTING,

/** Downloading outputs from a remote machine. */
DOWNLOADING
interface ProgressStatus {
/** Post this progress event to the given {@link ExtendedEventHandler}. */
void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action);
}

/**
Expand Down Expand Up @@ -214,7 +200,7 @@ SortedMap<PathFragment, ActionInput> getInputMapping(PathFragment baseDirectory)
throws IOException, ForbiddenActionInputException;

/** Reports a progress update to the Spawn strategy. */
void report(ProgressStatus state, String name);
void report(ProgressStatus progress);

/**
* Returns a {@link MetadataInjector} that allows a caller to inject metadata about spawn
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.exec;

import com.google.auto.value.AutoValue;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.SchedulingActionEvent;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;

/**
* Notifies that {@link SpawnRunner} is waiting for local or remote resources to become available.
*/
@AutoValue
public abstract class SpawnSchedulingEvent implements ProgressStatus {
public static SpawnSchedulingEvent create(String name) {
return new AutoValue_SpawnSchedulingEvent(name);
}

public abstract String name();

@Override
public void postTo(ExtendedEventHandler eventHandler, ActionExecutionMetadata action) {
eventHandler.post(new SchedulingActionEvent(action, name()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.exec.BinTools;
import com.google.devtools.build.lib.exec.RunfilesTreeUpdater;
import com.google.devtools.build.lib.exec.SpawnExecutingEvent;
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.exec.SpawnSchedulingEvent;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
Expand Down Expand Up @@ -131,10 +133,10 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
Profiler.instance()
.profile(ProfilerTask.LOCAL_EXECUTION, spawn.getResourceOwner().getMnemonic())) {
ActionExecutionMetadata owner = spawn.getResourceOwner();
context.report(ProgressStatus.SCHEDULING, getName());
context.report(SpawnSchedulingEvent.create(getName()));
try (ResourceHandle handle =
resourceManager.acquireResources(owner, spawn.getLocalResources())) {
context.report(ProgressStatus.EXECUTING, getName());
context.report(SpawnExecutingEvent.create(getName()));
if (!localExecutionOptions.localLockfreeOutput) {
context.lockOutputFiles();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.exec.SpawnCache;
import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;
import com.google.devtools.build.lib.exec.SpawnCheckingCacheEvent;
import com.google.devtools.build.lib.exec.SpawnExecutingEvent;
import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionContext;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
Expand All @@ -54,6 +55,12 @@
@ThreadSafe // If the RemoteActionCache implementation is thread-safe.
final class RemoteSpawnCache implements SpawnCache {

private static final SpawnCheckingCacheEvent SPAWN_CHECKING_CACHE_EVENT =
SpawnCheckingCacheEvent.create("remote-cache");

private static final SpawnExecutingEvent SPAWN_EXECUTING_EVENT =
SpawnExecutingEvent.create("remote-cache");

private final Path execRoot;
private final RemoteOptions options;
private final boolean verboseFailures;
Expand Down Expand Up @@ -97,7 +104,7 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
Profiler prof = Profiler.instance();
if (options.remoteAcceptCached
|| (options.incompatibleRemoteResultsIgnoreDisk && useDiskCache(options))) {
context.report(ProgressStatus.CHECKING_CACHE, "remote-cache");
context.report(SPAWN_CHECKING_CACHE_EVENT);
// Metadata will be available in context.current() until we detach.
// This is done via a thread-local variable.
try {
Expand Down Expand Up @@ -151,6 +158,8 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
}
}

context.report(SPAWN_EXECUTING_EVENT);

context.prefetchInputs();

if (options.remoteUploadLocalResults
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@
import com.google.devtools.build.lib.exec.AbstractSpawnStrategy;
import com.google.devtools.build.lib.exec.ExecutionOptions;
import com.google.devtools.build.lib.exec.RemoteLocalFallbackRegistry;
import com.google.devtools.build.lib.exec.SpawnCheckingCacheEvent;
import com.google.devtools.build.lib.exec.SpawnExecutingEvent;
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.exec.SpawnSchedulingEvent;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
Expand Down Expand Up @@ -81,6 +84,15 @@
@ThreadSafe
public class RemoteSpawnRunner implements SpawnRunner {

private static final SpawnCheckingCacheEvent SPAWN_CHECKING_CACHE_EVENT =
SpawnCheckingCacheEvent.create("remote");

private static final SpawnSchedulingEvent SPAWN_SCHEDULING_EVENT =
SpawnSchedulingEvent.create("remote");

private static final SpawnExecutingEvent SPAWN_EXECUTING_EVENT =
SpawnExecutingEvent.create("remote");

private final Path execRoot;
private final RemoteOptions remoteOptions;
private final ExecutionOptions executionOptions;
Expand Down Expand Up @@ -143,7 +155,7 @@ public void onNext(Operation o) throws IOException {
}

public void reportExecuting() {
context.report(ProgressStatus.EXECUTING, getName());
context.report(SPAWN_EXECUTING_EVENT);
reportedExecuting = true;
}

Expand All @@ -165,8 +177,6 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
boolean uploadLocalResults = remoteOptions.remoteUploadLocalResults && spawnCacheableRemotely;
boolean acceptCachedResult = remoteOptions.remoteAcceptCached && spawnCacheableRemotely;

context.report(ProgressStatus.SCHEDULING, getName());

RemoteAction action = remoteExecutionService.buildRemoteAction(spawn, context);
SpawnMetrics.Builder spawnMetrics =
SpawnMetrics.Builder.forRemoteExec()
Expand All @@ -179,6 +189,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)

Profiler prof = Profiler.instance();
try {
context.report(SPAWN_CHECKING_CACHE_EVENT);

// Try to lookup the action in the action cache.
RemoteActionResult cachedResult;
try (SilentCloseable c = prof.profile(ProfilerTask.REMOTE_CACHE_CHECK, "check cache hit")) {
Expand Down Expand Up @@ -232,6 +244,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
.minus(action.getNetworkTime().getDuration().minus(networkTimeStart)));
}

context.report(SPAWN_SCHEDULING_EVENT);

ExecutingStatusReporter reporter = new ExecutingStatusReporter(context);
RemoteActionResult result;
try (SilentCloseable c = prof.profile(REMOTE_EXECUTION, "execute remotely")) {
Expand Down
Loading

0 comments on commit f0983df

Please sign in to comment.