diff --git a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java index e8d9ec9a8..4a8bc7e72 100644 --- a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java +++ b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java @@ -386,6 +386,10 @@ enum TimingKind { * Running inside {@link CpsVmExecutorService}, which includes many other things. */ run, + /** + * Time spent waiting in the queue for {@link CpsVmExecutorService}. + */ + runQueue, /** * Saving the program state. * @see CpsThreadGroup#saveProgram(File) @@ -400,6 +404,8 @@ enum TimingKind { /** accumulated time in ns of a given {@link TimingKind#name}; {@link String} key for pretty XStream form */ transient @NonNull Map liveTimings = new ConcurrentHashMap<>(); + /** instances of {@link Timing} which have not yet completed for reporting counts and durations in support bundles. Never persisted. */ + transient @NonNull Set liveIncompleteTimings = ConcurrentHashMap.newKeySet(); /** XStream simplified form of {@link #liveTimings} */ private Map timings; @@ -433,7 +439,16 @@ private Timing(TimingKind kind) { start = System.nanoTime(); } + TimingKind getKind() { + return kind; + } + + long getStartNanos() { + return start; + } + @Override public void close() { + liveIncompleteTimings.remove(this); liveTimings.computeIfAbsent(kind.name(), k -> new LongAdder()).add(System.nanoTime() - start); } } @@ -444,7 +459,9 @@ private Timing(TimingKind kind) { * @return something to {@link Timing#close} when finished */ Timing time(TimingKind kind) { - return new Timing(kind); + var timing = new Timing(kind); + liveIncompleteTimings.add(timing); + return timing; } static final Logger TIMING_LOGGER = Logger.getLogger(CpsFlowExecution.class.getName() + ".timing"); @@ -1879,6 +1896,7 @@ public Object unmarshal(HierarchicalStreamReader reader, final UnmarshallingCont la.add(kv.getValue()); return la; })); + result.liveIncompleteTimings = ConcurrentHashMap.newKeySet(); return result; } catch (Exception ex) { LOGGER.log(Level.SEVERE, "Failed to even load the FlowExecution", ex); @@ -2025,7 +2043,7 @@ public void autopersist(@NonNull FlowNode n) throws IOException { } @Override public String getDisplayName() { - return "Timing data about recently completed Pipeline builds"; + return "Recently completed Pipeline builds"; } @Override public ComponentCategory getCategory() { @@ -2033,7 +2051,7 @@ public void autopersist(@NonNull FlowNode n) throws IOException { } @Override public void addContents(Container container) { - container.add(new Content("nodes/master/pipeline-timings.txt") { + container.add(new Content("nodes/master/pipeline-recent-builds.txt") { @Override public void writeTo(OutputStream outputStream) throws IOException { PrintWriter pw = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)); for (Job job : Jenkins.get().getAllItems(Job.class)) { diff --git a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java index 89864685b..b5ccd5e84 100644 --- a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java +++ b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java @@ -435,7 +435,9 @@ public void onFailure(Throwable t) { } }); } catch (IOException x) { - LOGGER.log(Level.FINE, null, x); + // TODO: If the problem is with the FlowNode and not the CpsFlowExecution, should we try to call + // CpsVmExecutorService.reportProblem or CpsFlowExecution.croak to kill the build right away? + LOGGER.log(Level.WARNING, "Unable to load FlowNode or CpsFlowExecution when completing " + this + ", which is likely to cause its execution to hang indefinitely", x); } } diff --git a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadDumpAction.java b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadDumpAction.java index 835f271de..e68c5ba1e 100644 --- a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadDumpAction.java +++ b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadDumpAction.java @@ -5,19 +5,29 @@ import com.cloudbees.jenkins.support.api.Content; import com.google.common.util.concurrent.FutureCallback; import hudson.Extension; +import hudson.Functions; import hudson.model.Action; +import hudson.model.Queue; +import hudson.model.Run; import hudson.security.Permission; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; -import java.util.Collections; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import jenkins.model.Jenkins; import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.atomic.LongAdder; +import java.util.stream.Collectors; import org.jenkinsci.plugins.workflow.flow.FlowExecution; import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; import org.kohsuke.stapler.HttpResponses; @@ -99,7 +109,7 @@ public CpsThreadDump getThreadDump() { } @Override public String getDisplayName() { - return "Thread dumps of running Pipeline builds"; + return "Running Pipeline builds"; } @Override public ComponentCategory getCategory() { @@ -107,14 +117,48 @@ public CpsThreadDump getThreadDump() { } @Override public void addContents(Container container) { - container.add(new Content("nodes/master/pipeline-thread-dump.txt") { - @Override public void writeTo(OutputStream outputStream) throws IOException { + container.add(new Content("nodes/master/pipeline-running-builds.txt") { + @Override public void writeTo(OutputStream outputStream) { PrintWriter pw = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)); for (FlowExecution flow : FlowExecutionList.get()) { if (flow instanceof CpsFlowExecution) { - pw.println("Build: " + flow.getOwner().getExecutable()); - ((CpsFlowExecution) flow).getThreadDump().print(pw); - pw.println("Approximate graph size: " + ((CpsFlowExecution) flow).approximateNodeCount()); + Queue.Executable ownerExec; + try { + ownerExec = flow.getOwner().getExecutable(); + } catch (IOException e) { + pw.println("No data available for " + flow); + Functions.printStackTrace(e, pw); + pw.println(); + continue; + } + pw.println("Build: " + ownerExec); + if (ownerExec instanceof Run) { + var run = (Run) ownerExec; + var started = Instant.ofEpochMilli(run.getStartTimeInMillis()); + pw.println("Started: " + started); + var duration = Duration.between(started, Instant.now()); + pw.print("Duration: " + duration); + if (duration.toDays() > 3) { + pw.println(" (Running for more than 3 days!)"); + } else { + pw.println(); + } + } + var cpsFlow = (CpsFlowExecution) flow; + Map sortedTimings = new TreeMap<>(cpsFlow.liveTimings); + pw.println("Timings:"); + sortedTimings.forEach((k, v) -> pw.println(" " + k + "\t" + v.longValue() / 1000 / 1000 + "ms")); + pw.println("Active operations:"); + long nanos = System.nanoTime(); + Map> sortedIncompleteTimings = new HashSet<>(cpsFlow.liveIncompleteTimings).stream() + .collect(Collectors.groupingBy(t -> t.getKind().name(), TreeMap::new, + Collectors.mapping(t -> new CountAndDuration(nanos - t.getStartNanos()), + Collectors.reducing(CountAndDuration::new)))); + sortedIncompleteTimings.forEach((k, optional) -> + optional.ifPresent(cd -> + pw.println(" " + k + "\t" + cd.count + "\t" + cd.duration / 1000 / 1000 + "ms"))); + pw.println("Approximate graph size: " + cpsFlow.approximateNodeCount()); + cpsFlow.getThreadDump().print(pw); pw.println(); } } @@ -123,6 +167,19 @@ public CpsThreadDump getThreadDump() { }); } + private static class CountAndDuration { + private final int count; + private final long duration; + CountAndDuration(long duration) { + this.count = 1; + this.duration = duration; + } + CountAndDuration(CountAndDuration a, CountAndDuration b) { + this.count = a.count + b.count; + this.duration = a.duration + b.duration; + } + } + } } diff --git a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsVmExecutorService.java b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsVmExecutorService.java index 9a67b0fa2..5f2335ba1 100644 --- a/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsVmExecutorService.java +++ b/plugin/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsVmExecutorService.java @@ -77,7 +77,9 @@ public Void call() { @Override protected Runnable wrap(final Runnable r) { + var timing = cpsThreadGroup.getExecution().time(CpsFlowExecution.TimingKind.runQueue); return () -> { + timing.close(); ThreadContext context = setUp(); try { r.run(); @@ -130,7 +132,9 @@ private void reportProblem(Throwable t) { @Override protected Callable wrap(final Callable r) { + var timing = cpsThreadGroup.getExecution().time(CpsFlowExecution.TimingKind.runQueue); return () -> { + timing.close(); ThreadContext context = setUp(); try { return r.call();