Skip to content

Commit

Permalink
Attempt to better distinguish restart/reset/rewind.
Browse files Browse the repository at this point in the history
* Restart: `SkyFunction#compute` returns null because values were missing. It is called again after the missing values are ready.
* Reset: In-progress evaluation of a node is started over to recover from an inconsistency. Temporary direct deps are cleared.
* Rewind: A done node is marked dirty with `DirtyType#REWIND`. It evaluates again to regenerate its value.

PiperOrigin-RevId: 567027792
Change-Id: I4c7b094005a15fd16f1d755de3b4d9a77f02da31
  • Loading branch information
justinhorvitz authored and copybara-github committed Sep 20, 2023
1 parent 7f60def commit c3d445a
Show file tree
Hide file tree
Showing 27 changed files with 247 additions and 230 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ public void dependOnFuture(ListenableFuture<?> future) {
}

@Override
public boolean restartPermitted() {
return delegate.restartPermitted();
public boolean resetPermitted() {
return delegate.resetPermitted();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ private SkyValue computeInternal(SkyKey skyKey, Environment env)
directories.getRelativeOutputPath(),
checkedInputs.actionInputMap,
action.getOutputs(),
env.restartPermitted());
env.resetPermitted());
}
}

Expand Down Expand Up @@ -426,10 +426,10 @@ private boolean declareDepsOnLostDiscoveredInputsIfAny(Environment env, Action a
}

/**
* Clean up state associated with the current action execution attempt and return a {@link
* Restart} value which rewinds the actions that generate the lost inputs.
* Cleans up state associated with the current action execution attempt and returns a {@link
* SkyFunction.Reset} value which rewinds the actions that generate the lost inputs.
*/
private SkyFunction.Restart handleLostInputs(
private SkyFunction.Reset handleLostInputs(
LostInputsActionExecutionException e,
ActionLookupData actionLookupData,
Action action,
Expand Down Expand Up @@ -480,10 +480,11 @@ private SkyFunction.Restart handleLostInputs(
try {
rewindPlan =
actionRewindStrategy.getRewindPlan(
action, actionLookupData, failedActionDeps, e, inputDepOwners, env);
actionLookupData, action, failedActionDeps, e, inputDepOwners, env);
} catch (ActionExecutionException rewindingFailedException) {
// This ensures coalesced shared actions aren't orphaned.
skyframeActionExecutor.resetRewindingAction(actionLookupData, action, lostDiscoveredInputs);
skyframeActionExecutor.prepareForRewinding(
actionLookupData, action, lostDiscoveredInputs, /* depsToRewind= */ ImmutableList.of());
throw new ActionExecutionFunctionException(
new AlreadyReportedActionExecutionException(
skyframeActionExecutor.processAndGetExceptionToThrow(
Expand All @@ -499,11 +500,9 @@ private SkyFunction.Restart handleLostInputs(
env.getListener()
.post(new ActionRewoundEvent(actionStartTimeNanos, BlazeClock.nanoTime(), action));
}
skyframeActionExecutor.resetRewindingAction(actionLookupData, action, lostDiscoveredInputs);
for (Action actionToRestart : rewindPlan.getAdditionalActionsToRestart()) {
skyframeActionExecutor.resetPreviouslyCompletedAction(actionLookupData, actionToRestart);
}
return rewindPlan.getNodesToRestart();
skyframeActionExecutor.prepareForRewinding(
actionLookupData, action, lostDiscoveredInputs, rewindPlan.getDepsToRewind());
return rewindPlan.getReset();
} finally {
if (rewindPlan == null && e.isActionStartedEventAlreadyEmitted()) {
// Rewinding was unsuccessful. SkyframeActionExecutor's ActionRunner didn't emit an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ public void dependOnFuture(ListenableFuture<?> future) {
}

@Override
public boolean restartPermitted() {
return delegate.restartPermitted();
public boolean resetPermitted() {
return delegate.resetPermitted();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,16 +425,43 @@ boolean shouldEmitProgressEvents(Action action) {
new OwnerlessArtifactWrapper(action.getPrimaryOutput()));
}

void resetPreviouslyCompletedAction(ActionLookupData actionLookupData, Action action) {
/**
* Called to prepare action execution states for rewinding after {@code failedAction} observed
* lost inputs.
*/
void prepareForRewinding(
ActionLookupData failedKey,
Action failedAction,
ImmutableList<SkyKey> lostDiscoveredInputs,
ImmutableList<Action> depsToRewind) {
var ownerlessArtifactWrapper = new OwnerlessArtifactWrapper(failedAction.getPrimaryOutput());
ActionExecutionState state = buildActionMap.get(ownerlessArtifactWrapper);
if (state != null) {
// If an action failed from lost inputs during input discovery then it won't have a state to
// obsolete.
state.obsolete(failedKey, buildActionMap, ownerlessArtifactWrapper);
}
if (!lostDiscoveredInputs.isEmpty()) {
lostDiscoveredInputsMap.put(ownerlessArtifactWrapper, lostDiscoveredInputs);
}
if (!actionFileSystemType().inMemoryFileSystem()) {
outputDirectoryHelper.invalidateTreeArtifactDirectoryCreation(failedAction.getOutputs());
}
for (Action dep : depsToRewind) {
prepareDepForRewinding(failedKey, dep);
}
}

private void prepareDepForRewinding(ActionLookupData failedKey, Action dep) {
OwnerlessArtifactWrapper ownerlessArtifactWrapper =
new OwnerlessArtifactWrapper(action.getPrimaryOutput());
new OwnerlessArtifactWrapper(dep.getPrimaryOutput());
ActionExecutionState actionExecutionState = buildActionMap.get(ownerlessArtifactWrapper);
if (actionExecutionState != null) {
actionExecutionState.obsolete(actionLookupData, buildActionMap, ownerlessArtifactWrapper);
actionExecutionState.obsolete(failedKey, buildActionMap, ownerlessArtifactWrapper);
}
completedAndResetActions.add(ownerlessArtifactWrapper);
if (!actionFileSystemType().inMemoryFileSystem()) {
outputDirectoryHelper.invalidateTreeArtifactDirectoryCreation(action.getOutputs());
outputDirectoryHelper.invalidateTreeArtifactDirectoryCreation(dep.getOutputs());
}
}

Expand All @@ -443,26 +470,6 @@ ImmutableList<SkyKey> getLostDiscoveredInputs(Action action) {
return lostDiscoveredInputsMap.get(new OwnerlessArtifactWrapper(action.getPrimaryOutput()));
}

void resetRewindingAction(
ActionLookupData actionLookupData,
Action action,
ImmutableList<SkyKey> lostDiscoveredInputs) {
OwnerlessArtifactWrapper ownerlessArtifactWrapper =
new OwnerlessArtifactWrapper(action.getPrimaryOutput());
ActionExecutionState state = buildActionMap.get(ownerlessArtifactWrapper);
if (state != null) {
// If an action failed from lost inputs during input discovery then it won't have a state to
// obsolete.
state.obsolete(actionLookupData, buildActionMap, ownerlessArtifactWrapper);
}
if (!lostDiscoveredInputs.isEmpty()) {
lostDiscoveredInputsMap.put(ownerlessArtifactWrapper, lostDiscoveredInputs);
}
if (!actionFileSystemType().inMemoryFileSystem()) {
outputDirectoryHelper.invalidateTreeArtifactDirectoryCreation(action.getOutputs());
}
}

void noteActionEvaluationStarted(ActionLookupData actionLookupData, Action action) {
if (completionReceiver != null) {
completionReceiver.noteActionEvaluationStarted(actionLookupData, action);
Expand Down Expand Up @@ -586,7 +593,7 @@ private ActionExecutionContext getContext(
actionInputPrefetcher,
actionKeyContext,
outputMetadataStore,
env.restartPermitted(),
env.resetPermitted(),
lostInputsCheck(actionFileSystem, action, outputService),
fileOutErr,
selectEventHandler(emitProgressEvents),
Expand Down Expand Up @@ -824,7 +831,7 @@ NestedSet<Artifact> discoverInputs(
actionInputPrefetcher,
actionKeyContext,
outputMetadataStore,
env.restartPermitted(),
env.resetPermitted(),
lostInputsCheck(actionFileSystem, action, outputService),
fileOutErr,
eventHandler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ public void dependOnFuture(ListenableFuture<?> future) {
}

@Override
public boolean restartPermitted() {
return delegate.restartPermitted();
public boolean resetPermitted() {
return delegate.resetPermitted();
}

@Override
Expand Down
Loading

0 comments on commit c3d445a

Please sign in to comment.