Skip to content

Commit

Permalink
Fix crashes introduced by heuristically dropping file and directory l…
Browse files Browse the repository at this point in the history
…isting state nodes

# Background
* 658ba15 implemented heuristically dropping state nodes under flag `--experimental_heuristically_drop_nodes`. Multiple blaze crashes are observed when the flag is applied.
* abb402e enhanced `GraphInconsistencyReceiver` (`GIR` in short) so that it can tolerate inconsistencies introduced by heuristically dropping state nodes regardless of rewinding. This commit also fixes the crash when blaze cannot get file/directory state `depKey`s for symlinks.

# Goal of this commit

Local tests with target `//devtools/blaze/main:blaze` still indicates two other crashes when heuristically dropping state nodes.

1. `SkyFunctionEnvironment#batchPrefetch(true)` could inform `GIR` with `BUILDING_PARENT_FOUND_UNDONE_CHILD` inconsistency which also needs to be tolerated.
2. `AbstractParallelEvaluator#maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry()` should re-create instead of crash when declared dep is missing.

PiperOrigin-RevId: 504053773
Change-Id: I43e2f90ac3b84a9fbe9e590c90c20db3a94cd54b
  • Loading branch information
yuyue730 authored and copybara-github committed Jan 23, 2023
1 parent e372e70 commit b24930f
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ public static boolean isExpectedInconsistency(
if (inconsistency == Inconsistency.RESET_REQUESTED) {
return otherKeys == null;
}
if (inconsistency == Inconsistency.ALREADY_DECLARED_CHILD_MISSING) {
if (inconsistency == Inconsistency.ALREADY_DECLARED_CHILD_MISSING
|| inconsistency == Inconsistency.BUILDING_PARENT_FOUND_UNDONE_CHILD) {
// For already declared child missing inconsistency, key is the parent while `otherKeys`
// are the children (dependency nodes).
SkyFunctionName expectedStateKeyType =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3387,6 +3387,7 @@ private <T extends SkyValue> EvaluationResult<T> evaluate(
.setKeepGoing(keepGoing)
.setParallelism(numThreads)
.setEventHandler(eventHandler)
.setHeuristicallyDropNodes(heuristicallyDropNodes)
.build();
return memoizingEvaluator.evaluate(roots, evaluationContext);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ abstract class AbstractParallelEvaluator {

protected final Cache<SkyKey, SkyKeyComputeState> stateCache = Caffeine.newBuilder().build();

private final boolean heuristicallyDropNodes;

AbstractParallelEvaluator(
ProcessableGraph graph,
Version graphVersion,
Expand All @@ -132,7 +134,8 @@ abstract class AbstractParallelEvaluator {
GraphInconsistencyReceiver graphInconsistencyReceiver,
QuiescingExecutor executor,
CycleDetector cycleDetector,
boolean mergingSkyframeAnalysisExecutionPhases) {
boolean mergingSkyframeAnalysisExecutionPhases,
boolean heuristicallyDropNodes) {
this.graph = graph;
this.cycleDetector = cycleDetector;
this.evaluatorContext =
Expand All @@ -151,6 +154,7 @@ abstract class AbstractParallelEvaluator {
() -> new NodeEntryVisitor(executor, progressReceiver, Evaluate::new, stateCache),
/* mergingSkyframeAnalysisExecutionPhases= */ mergingSkyframeAnalysisExecutionPhases,
stateCache);
this.heuristicallyDropNodes = heuristicallyDropNodes;
}

/**
Expand Down Expand Up @@ -1108,14 +1112,16 @@ private boolean maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry(
}

for (SkyKey newDep : newlyAddedNewDeps) {
// TODO(b/261019506): When heuristically dropping node is enabled, declared dep might be
// missing and the check below will fail leading to blaze crash.
NodeEntry depEntry =
checkNotNull(
newlyAddedNewDepNodes.get().get(newDep),
"Missing already declared dep %s (parent=%s)",
newDep,
skyKey);
heuristicallyDropNodes
? getOrRecreateDepEntry(
newDep, newlyAddedNewDepNodes.get(), skyKey, Reason.RDEP_ADDITION)
: checkNotNull(
newlyAddedNewDepNodes.get().get(newDep),
"Missing already declared dep %s (parent=%s)",
newDep,
skyKey);

DependencyState triState = depEntry.addReverseDepAndCheckIfDone(skyKey);
switch (maybeHandleUndoneDepForDoneEntry(entry, depEntry, triState, skyKey, newDep)) {
case DEP_DONE_SELF_SIGNALLED:
Expand All @@ -1140,6 +1146,29 @@ private boolean maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry(
return !selfSignalled;
}

/**
* Returns a {@link NodeEntry} for {@code depKey}.
*
* <p>If {@code depKey} is present in {@code depEntries}, its corresponding entry is returned.
* Otherwise, if the evaluator permits {@link Inconsistency#ALREADY_DECLARED_CHILD_MISSING}, the
* entry will be recreated.
*/
private NodeEntry getOrRecreateDepEntry(
SkyKey depKey, NodeBatch depEntries, SkyKey requestor, Reason reason)
throws InterruptedException {
NodeEntry depEntry = depEntries.get(depKey);
if (depEntry != null) {
return depEntry;
}

ImmutableList<SkyKey> missing = ImmutableList.of(depKey);
evaluatorContext
.getGraphInconsistencyReceiver()
.noteInconsistencyAndMaybeThrow(
requestor, missing, Inconsistency.ALREADY_DECLARED_CHILD_MISSING);
return checkNotNull(graph.createIfAbsentBatch(requestor, reason, missing).get(depKey), depKey);
}

private enum MaybeHandleUndoneDepResult {
DEP_DONE_SELF_SIGNALLED,
DEP_DONE_SELF_NOT_SIGNALLED,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public class EvaluationContext {
private final boolean isExecutionPhase;
private final boolean mergingSkyframeAnalysisExecutionPhases;
private final UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver;
private final boolean heuristicallyDropNodes;

protected EvaluationContext(
int parallelism,
Expand All @@ -43,14 +44,16 @@ protected EvaluationContext(
ExtendedEventHandler eventHandler,
boolean isExecutionPhase,
boolean mergingSkyframeAnalysisExecutionPhases,
UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver) {
UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver,
boolean heuristicallyDropNodes) {
this.parallelism = parallelism;
this.executor = executor;
this.keepGoing = keepGoing;
this.eventHandler = Preconditions.checkNotNull(eventHandler);
this.isExecutionPhase = isExecutionPhase;
this.mergingSkyframeAnalysisExecutionPhases = mergingSkyframeAnalysisExecutionPhases;
this.unnecessaryTemporaryStateDropperReceiver = unnecessaryTemporaryStateDropperReceiver;
this.heuristicallyDropNodes = heuristicallyDropNodes;
}

public int getParallelism() {
Expand Down Expand Up @@ -113,6 +116,10 @@ public UnnecessaryTemporaryStateDropperReceiver getUnnecessaryTemporaryStateDrop
return unnecessaryTemporaryStateDropperReceiver;
}

public boolean getHeuristicallyDropNodes() {
return heuristicallyDropNodes;
}

public Builder builder() {
return newBuilder().copyFrom(this);
}
Expand All @@ -131,6 +138,7 @@ public static class Builder {
protected boolean mergingSkyframeAnalysisExecutionPhases;
protected UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver =
UnnecessaryTemporaryStateDropperReceiver.NULL;
protected boolean heuristicallyDropNodes;

protected Builder() {}

Expand Down Expand Up @@ -192,6 +200,12 @@ public Builder setUnnecessaryTemporaryStateDropperReceiver(
return this;
}

@CanIgnoreReturnValue
public Builder setHeuristicallyDropNodes(boolean heuristicallyDropNodes) {
this.heuristicallyDropNodes = heuristicallyDropNodes;
return this;
}

public EvaluationContext build() {
return new EvaluationContext(
parallelism,
Expand All @@ -200,7 +214,8 @@ public EvaluationContext build() {
eventHandler,
isExecutionPhase,
mergingSkyframeAnalysisExecutionPhases,
unnecessaryTemporaryStateDropperReceiver);
unnecessaryTemporaryStateDropperReceiver,
heuristicallyDropNodes);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ public <T extends SkyValue> EvaluationResult<T> evaluate(
ParallelEvaluatorErrorClassifier.instance())),
new SimpleCycleDetector(),
evaluationContext.mergingSkyframeAnalysisExecutionPhases(),
evaluationContext.getUnnecessaryTemporaryStateDropperReceiver());
evaluationContext.getUnnecessaryTemporaryStateDropperReceiver(),
evaluationContext.getHeuristicallyDropNodes());
result = evaluator.eval(roots);
}
return EvaluationResult.<T>builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ public ParallelEvaluator(
QuiescingExecutor executor,
CycleDetector cycleDetector,
boolean mergingSkyframeAnalysisExecutionPhases,
UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver) {
UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver,
boolean heuristicallyDropNodes) {
super(
graph,
graphVersion,
Expand All @@ -86,10 +87,46 @@ public ParallelEvaluator(
graphInconsistencyReceiver,
executor,
cycleDetector,
mergingSkyframeAnalysisExecutionPhases);
mergingSkyframeAnalysisExecutionPhases,
heuristicallyDropNodes);
this.unnecessaryTemporaryStateDropperReceiver = unnecessaryTemporaryStateDropperReceiver;
}

public ParallelEvaluator(
ProcessableGraph graph,
Version graphVersion,
Version minimalVersion,
ImmutableMap<SkyFunctionName, SkyFunction> skyFunctions,
ExtendedEventHandler reporter,
NestedSetVisitor.VisitedState emittedEventState,
EventFilter storedEventFilter,
ErrorInfoManager errorInfoManager,
boolean keepGoing,
DirtyTrackingProgressReceiver progressReceiver,
GraphInconsistencyReceiver graphInconsistencyReceiver,
QuiescingExecutor executor,
CycleDetector cycleDetector,
boolean mergingSkyframeAnalysisExecutionPhases,
UnnecessaryTemporaryStateDropperReceiver unnecessaryTemporaryStateDropperReceiver) {
this(
graph,
graphVersion,
minimalVersion,
skyFunctions,
reporter,
emittedEventState,
storedEventFilter,
errorInfoManager,
keepGoing,
progressReceiver,
graphInconsistencyReceiver,
executor,
cycleDetector,
mergingSkyframeAnalysisExecutionPhases,
unnecessaryTemporaryStateDropperReceiver,
/* heuristicallyDropNodes= */ false);
}

private void informProgressReceiverThatValueIsDone(SkyKey key, NodeEntry entry)
throws InterruptedException {
if (evaluatorContext.getProgressReceiver() == null) {
Expand Down

0 comments on commit b24930f

Please sign in to comment.