diff --git a/src/main/java/hudson/remoting/Channel.java b/src/main/java/hudson/remoting/Channel.java index 601cc1a54..4aafdcfe6 100644 --- a/src/main/java/hudson/remoting/Channel.java +++ b/src/main/java/hudson/remoting/Channel.java @@ -32,6 +32,7 @@ import hudson.remoting.forward.PortForwarder; import org.jenkinsci.remoting.CallableDecorator; import org.jenkinsci.remoting.RoleChecker; +import org.jenkinsci.remoting.nio.NioChannelHub; import javax.annotation.CheckForNull; import javax.annotation.Nonnull; @@ -45,6 +46,7 @@ import java.lang.ref.WeakReference; import java.net.URL; import java.util.Collections; +import java.util.Date; import java.util.Hashtable; import java.util.Locale; import java.util.Map; @@ -204,8 +206,40 @@ public class Channel implements VirtualChannel, IChannel, Closeable { */ private final Vector listeners = new Vector(); private int gcCounter; + + /** + * Number of {@link Command} objects sent to the other side. + */ private int commandsSent; + /** + * Number of {@link Command} objects received from the other side. + * + * When a transport is functioning correctly, {@link #commandsSent} of one side + * and {@link #commandsReceived} of the other side should closely match. + */ + private int commandsReceived; + + /** + * Timestamp of the last {@link Command} object sent/received, in + * {@link System#currentTimeMillis()} format. + * This can be used as a basis for detecting dead connections. + * + *

+ * Note that {@link #lastCommandSentAt} doesn't mean + * anything in terms of whether the underlying network was able to send + * the data (for example, if the other end of a socket connection goes down + * without telling us anything, the {@link SocketOutputStream#write(int)} will + * return right away, and the socket only really times out after 10s of minutes. + */ + private volatile long lastCommandSentAt, lastCommandReceivedAt; + + /** + * Timestamp of when this channel was connected/created, in + * {@link System#currentTimeMillis()} format. + */ + private final long createdAt = System.currentTimeMillis(); + /** * Total number of nanoseconds spent for remote class loading. *

@@ -266,19 +300,6 @@ public class Channel implements VirtualChannel, IChannel, Closeable { */ public final Capability remoteCapability; - /** - * When did we receive any data from this slave the last time? - * This can be used as a basis for detecting dead connections. - *

- * Note that this doesn't include our sender side of the operation, - * as successfully returning from {@link #send(Command)} doesn't mean - * anything in terms of whether the underlying network was able to send - * the data (for example, if the other end of a socket connection goes down - * without telling us anything, the {@link SocketOutputStream#write(int)} will - * return right away, and the socket only really times out after 10s of minutes. - */ - private volatile long lastHeard; - /** * Single-thread executor for running pipe I/O operations. * @@ -494,7 +515,8 @@ protected Channel(ChannelBuilder settings, CommandTransport transport) throws IO transport.setup(this, new CommandReceiver() { public void handle(Command cmd) { - updateLastHeard(); + commandsReceived++; + lastCommandReceivedAt = System.currentTimeMillis(); if (logger.isLoggable(Level.FINE)) logger.fine("Received " + cmd); try { @@ -509,6 +531,7 @@ public void terminate(IOException e) { Channel.this.terminate(e); } }); + ACTIVE_CHANNELS.put(this,ref()); } /** @@ -581,6 +604,7 @@ private ExecutorService createPipeWriterExecutor() { transport.write(cmd, cmd instanceof CloseCommand); commandsSent++; + lastCommandSentAt = System.currentTimeMillis(); } /** @@ -1148,6 +1172,73 @@ public void dumpPerformanceCounters(PrintWriter w) throws IOException { w.printf(Locale.ENGLISH, "Resource loading time=%,dms%n", resourceLoadingTime.get() / (1000 * 1000)); } + /** + * Print the diagnostic information. + * + *

+ * Here's how you interpret these metrics: + * + *

+ *
Created + *
+ * When the channel was created, which is more or less the same thing as when the channel is connected. + * + *
Commands sent + *
+ * Number of {@link Command} objects sent to the other side. More specifically, number of commands + * successfully passed to {@link #transport}, which means data was written to socket with + * {@link ClassicCommandTransport} but just buffered for write with {@link NioChannelHub}. + * + * If you have access to the remoting diagnostics + * of the other side of the channel, then you can compare this with "commandsReceived" metrics of the other + * side to see how many commands are in transit in transport. If {@code commandsSent==commandsReceived}, + * then no command is in transit. + * + *
Commands received + *
+ * Number of {@link Command} objects received from the other side. More precisely, number + * of commands reported from {@link #transport}. So for example, if data is still buffered somewhere + * in the networking stack, it won't be counted here. + * + *
Last command sent + *
+ * The timestamp in which the last command was sent to the other side. The timestamp in which + * {@link #lastCommandSentAt} was updated. + * + *
Last command received + *
+ * The timestamp in which the last command was sent to the other side. The timestamp in which + * {@link #lastCommandReceivedAt} was updated. + * + *
Pending outgoing calls + *
+ * Number of RPC calls (e.g., method call through a {@linkplain RemoteInvocationHandler proxy}) + * that are made but not returned yet. If you have the remoting diagnostics of the other side, you + * can compare this number with "pending incoming calls" on the other side to see how many RPC + * calls are executing vs in flight. "one side's incoming calls" < "the other side's outgoing calls" + * indicates some RPC requests or responses are passing through the network layer, and mismatch + * between "# of commands sent" vs "# of commands received" can give some indication of whether + * it is request or response that's in flight. + * + *
Pending incoming calls + *
+ * The reverse of "pending outgoing calls". + * Number of RPC calls (e.g., method call through a {@linkplain RemoteInvocationHandler proxy}) + * that the other side has made to us but not yet returned yet. + * + * @since 2.26.3 + */ + public void dumpDiagnostics(PrintWriter w) throws IOException { + w.printf("Channel %s%n",name); + w.printf(" Created=%s%n", new Date(createdAt)); + w.printf(" Commands sent=%d%n", commandsSent); + w.printf(" Commands received=%d%n", commandsReceived); + w.printf(" Last command sent=%s%n", new Date(lastCommandSentAt)); + w.printf(" Last command received=%s%n", new Date(lastCommandReceivedAt)); + w.printf(" Pending outgoing calls=%d%n", pendingCalls.size()); + w.printf(" Pending incoming calls=%d%n", pendingCalls.size()); + } + /** * {@inheritDoc} */ @@ -1468,12 +1559,7 @@ public ExportTable.ExportList startExportRecording() { */ public long getLastHeard() { // TODO - this is not safe against clock skew and is called from jenkins core (and potentially plugins) - return lastHeard; - } - - private void updateLastHeard() { - // TODO - this is not safe against clock skew and is called from jenkins core (and potentially plugins) - lastHeard = System.currentTimeMillis(); + return lastCommandReceivedAt; } /*package*/ static Channel setCurrent(Channel channel) { @@ -1498,6 +1584,20 @@ public static Channel current() { return CURRENT.get(); } + /** + * Calls {@link #dumpDiagnostics(PrintWriter)} across all the active channels in this system. + * Used for diagnostics. + * + * @since 2.26.3 + */ + public static void dumpDiagnosticsForAll(PrintWriter w) throws IOException { + for (Ref ref : ACTIVE_CHANNELS.values().toArray(new Ref[0])) { + Channel ch = ref.channel(); + if (ch!=null) + ch.dumpDiagnostics(w); + } + } + /** * Remembers the current "channel" associated for this thread. */ @@ -1522,6 +1622,11 @@ public static Channel current() { */ public static final int PIPE_WINDOW_SIZE = Integer.getInteger(Channel.class.getName()+".pipeWindowSize",1024*1024); + /** + * Keep track of active channels in the system for diagnostics purposes. + */ + private static final Map ACTIVE_CHANNELS = Collections.synchronizedMap(new WeakHashMap()); + static final Class jarLoaderProxy; static { diff --git a/src/main/java/hudson/remoting/Command.java b/src/main/java/hudson/remoting/Command.java index 87d60b697..cce690952 100644 --- a/src/main/java/hudson/remoting/Command.java +++ b/src/main/java/hudson/remoting/Command.java @@ -31,6 +31,8 @@ /** * One-way command to be sent over to the remote system and executed there. + * This is the smallest unit of message in remoting from one side to another, + * such as "please execute this method" or "here's the return value from an earlier method call". * This is layer 0, the lower most layer. * *

diff --git a/src/test/java/hudson/remoting/ChannelTest.java b/src/test/java/hudson/remoting/ChannelTest.java index b4581eeb9..bbd1e8819 100644 --- a/src/test/java/hudson/remoting/ChannelTest.java +++ b/src/test/java/hudson/remoting/ChannelTest.java @@ -5,7 +5,9 @@ import java.io.IOException; import java.io.ObjectInputStream; +import java.io.PrintWriter; import java.io.Serializable; +import java.io.StringWriter; import java.net.URL; import java.net.URLClassLoader; import java.util.concurrent.TimeUnit; @@ -163,4 +165,15 @@ public T call() throws RuntimeException { return t; } } + + @Bug(39150) + public void testDiagnostics() throws Exception { + StringWriter sw = new StringWriter(); + Channel.dumpDiagnosticsForAll(new PrintWriter(sw)); + System.out.println(sw); + assertTrue(sw.toString().contains("Channel north")); + assertTrue(sw.toString().contains("Channel south")); + assertTrue(sw.toString().contains("Commands sent=0")); + assertTrue(sw.toString().contains("Commands received=0")); + } }