From fe1e25b1373bc9d4294ee354add4011916e9f44a Mon Sep 17 00:00:00 2001 From: larsrc Date: Mon, 5 Jul 2021 06:29:38 -0700 Subject: [PATCH] Fix rare crash in dynamic execution where both branches got cancelled. Races in `stopBranch` caused the losing branch to throw `DynamicInterruptedException`, which could then get to that branch's listener before the winning branch actually cancelled. RELNOTES: None. PiperOrigin-RevId: 383126912 --- .../lib/dynamic/DynamicSpawnStrategy.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/dynamic/DynamicSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/dynamic/DynamicSpawnStrategy.java index 707225173cf753..87a9ab757173c6 100644 --- a/src/main/java/com/google/devtools/build/lib/dynamic/DynamicSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/dynamic/DynamicSpawnStrategy.java @@ -571,13 +571,21 @@ ImmutableList callImpl(ActionExecutionContext context) DynamicSpawnStrategy.this.options, actionExecutionContext, spawn)); + } catch (DynamicInterruptedException e) { + // This exception can be thrown due to races in stopBranch(), in which case + // the branch that lost the race may not have been cancelled yet. Cancel it here + // to prevent the listener from cross-cancelling. + localBranch.cancel(true); + throw e; } catch ( @SuppressWarnings("InterruptedExceptionSwallowed") Throwable e) { if (options.debugSpawnScheduler) { logger.atInfo().log( - "Local branch of %s failed with %s", - spawn.getResourceOwner().prettyPrint(), e.getMessage()); + "Local branch of %s failed with %s: '%s'", + spawn.getResourceOwner().prettyPrint(), + e.getClass().getSimpleName(), + e.getMessage()); } throw e; } finally { @@ -627,13 +635,21 @@ public ImmutableList callImpl(ActionExecutionContext context) spawn)); delayLocalExecution.set(true); return spawnResults; + } catch (DynamicInterruptedException e) { + // This exception can be thrown due to races in stopBranch(), in which case + // the branch that lost the race may not have been cancelled yet. Cancel it here + // to prevent the listener from cross-cancelling. + remoteBranch.cancel(true); + throw e; } catch ( @SuppressWarnings("InterruptedExceptionSwallowed") Throwable e) { if (options.debugSpawnScheduler) { logger.atInfo().log( - "Remote branch of %s failed with %s", - spawn.getResourceOwner().prettyPrint(), e.getMessage()); + "Remote branch of %s failed with %s: '%s'", + spawn.getResourceOwner().prettyPrint(), + e.getClass().getSimpleName(), + e.getMessage()); } throw e; } finally {