From e48ee019dd9e33f6f4cf4c1bae04443e01cfee92 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 5 Nov 2021 21:39:48 +0000 Subject: [PATCH 1/7] Track histogram of transport handling times Adds to the transport node stats a record of the distribution of the times for which a transport thread was handling a message, represented as a histogram. Closes #80428 --- docs/reference/cluster/nodes-stats.asciidoc | 24 ++++++ .../test/nodes.stats/60_transport_stats.yml | 30 ++++++++ .../common/network/HandlingTimeTracker.java | 62 +++++++++++++++ .../common/network/NetworkService.java | 5 ++ .../http/AbstractHttpServerTransport.java | 1 + .../transport/InboundHandler.java | 7 +- .../transport/OutboundHandler.java | 13 +++- .../elasticsearch/transport/TcpTransport.java | 17 +++- .../transport/TransportStats.java | 73 +++++++++++++++++- .../cluster/node/stats/NodeStatsTests.java | 14 +++- .../network/HandlingTimeTrackerTests.java | 77 +++++++++++++++++++ .../transport/InboundHandlerTests.java | 7 +- .../transport/OutboundHandlerTests.java | 3 +- .../transport/TcpTransportTests.java | 4 +- .../transport/TestTransportChannels.java | 3 +- 15 files changed, 327 insertions(+), 13 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java create mode 100644 server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index 253890cd2a175..bd2b8ffad7590 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -1899,6 +1899,30 @@ Size of TX packets sent by the node during internal cluster communication. (integer) Size, in bytes, of TX packets sent by the node during internal cluster communication. + +`handling_time_histogram`:: +(array) +The distribution of the time spent handling a message on a transport thread, +represented as a histogram. ++ +.Properties of `handling_time_histogram` +[%collapsible] +======= +`ge_millis`:: +(integer) +The inclusive lower bound of the bucket in milliseconds. Omitted on the first +bucket since this bucket has no lower bound. + +`lt_millis`:: +(integer) +The exclusive upper bound of the bucket in milliseconds. Omitted on the last +bucket since this bucket has no upper bound. + +`count`:: +(integer) +The number of times a transport thread took a period of time within the bounds +of this bucket to handle a message. +======= ====== [[cluster-nodes-stats-api-response-body-http]] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml index 4f4b97bbcd521..bbfad5e60cec6 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml @@ -20,3 +20,33 @@ - gte: { nodes.$node_id.transport.tx_count: 0 } - gte: { nodes.$node_id.transport.rx_size_in_bytes: 0 } - gte: { nodes.$node_id.transport.tx_size_in_bytes: 0 } + +--- +"Transport handling time histogram": + - skip: + version: " - 8.0.99" + reason: "handling_time_histogram was added in 8.1" + features: [arbitrary_key] + + - do: + nodes.info: {} + - set: + nodes._arbitrary_key_: node_id + + - do: + nodes.stats: + metric: [ transport ] + + - length: { nodes.$node_id.transport.handling_time_histogram: 11 } + + - gte: { nodes.$node_id.transport.handling_time_histogram.0.count: 0 } + - is_false: nodes.$node_id.transport.handling_time_histogram.0.ge_millis + - match: { nodes.$node_id.transport.handling_time_histogram.0.lt_millis: 100 } + + - gte: { nodes.$node_id.transport.handling_time_histogram.1.count: 0 } + - match: { nodes.$node_id.transport.handling_time_histogram.1.ge_millis: 100 } + - match: { nodes.$node_id.transport.handling_time_histogram.1.lt_millis: 300 } + + - gte: { nodes.$node_id.transport.handling_time_histogram.10.count: 0 } + - match: { nodes.$node_id.transport.handling_time_histogram.10.ge_millis: 65536 } + - is_false: nodes.$node_id.transport.handling_time_histogram.10.lt_millis diff --git a/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java new file mode 100644 index 0000000000000..96050b31a967a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.network; + +import java.util.concurrent.atomic.LongAdder; + +/** + * Tracks how long message handling takes on a transport thread as a histogram with fixed buckets. + */ +public class HandlingTimeTracker { + + public static int[] getBucketUpperBounds() { + // Default clock resolution is 200ms so we have buckets for the 0-tick and 1-tick cases, then go up in powers of two + return new int[] { 100, 300, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16 }; + } + + private static int getBucket(long handlingTimeMillis) { + if (handlingTimeMillis < 100L) { + return 0; + } else if (handlingTimeMillis < 300L) { + return 1; + } else if (1L << 16 <= handlingTimeMillis) { + return BUCKET_COUNT - 1; + } else { + return Long.SIZE - 7 - Long.numberOfLeadingZeros(handlingTimeMillis); + } + } + + public static final int BUCKET_COUNT = getBucketUpperBounds().length + 1; + + private final LongAdder[] buckets; + + public HandlingTimeTracker() { + buckets = new LongAdder[BUCKET_COUNT]; + for (int i = 0; i < BUCKET_COUNT; i++) { + buckets[i] = new LongAdder(); + } + } + + public void addHandlingTime(long handlingTimeMillis) { + buckets[getBucket(handlingTimeMillis)].increment(); + } + + /** + * @return An array of frequencies of handling times in buckets with upper bounds as returned by {@link #getBucketUpperBounds()}, plus + * an extra bucket for handling times longer than the longest upper bound. + */ + public long[] getHistogram() { + final long[] histogram = new long[BUCKET_COUNT]; + for (int i = 0; i < BUCKET_COUNT; i++) { + histogram[i] = buckets[i].longValue(); + } + return histogram; + } + +} diff --git a/server/src/main/java/org/elasticsearch/common/network/NetworkService.java b/server/src/main/java/org/elasticsearch/common/network/NetworkService.java index f816d9446ae9b..25c6aeea4e2db 100644 --- a/server/src/main/java/org/elasticsearch/common/network/NetworkService.java +++ b/server/src/main/java/org/elasticsearch/common/network/NetworkService.java @@ -90,11 +90,16 @@ public interface CustomNameResolver { } private final List customNameResolvers; + private final HandlingTimeTracker handlingTimeTracker = new HandlingTimeTracker(); public NetworkService(List customNameResolvers) { this.customNameResolvers = Objects.requireNonNull(customNameResolvers, "customNameResolvers must be non null"); } + public HandlingTimeTracker getHandlingTimeTracker() { + return handlingTimeTracker; + } + /** * Resolves {@code bindHosts} to a list of internet addresses. The list will * not contain duplicate addresses. diff --git a/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java b/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java index 5db7dbdf4f34c..43a167258c792 100644 --- a/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java +++ b/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java @@ -360,6 +360,7 @@ public void incomingRequest(final HttpRequest httpRequest, final HttpChannel htt handleIncomingRequest(httpRequest, httpChannel, httpRequest.getInboundException()); } finally { final long took = threadPool.relativeTimeInMillis() - startTime; + networkService.getHandlingTimeTracker().addHandlingTime(took); final long logThreshold = slowLogThresholdMs; if (logThreshold > 0 && took > logThreshold) { logger.warn( diff --git a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java index e5e27b059426f..9f8e88970d24a 100644 --- a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java +++ b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.ThreadContext; @@ -41,6 +42,7 @@ public class InboundHandler { private final TransportHandshaker handshaker; private final TransportKeepAlive keepAlive; private final Transport.ResponseHandlers responseHandlers; + private final HandlingTimeTracker handlingTimeTracker; private final Transport.RequestHandlers requestHandlers; private volatile TransportMessageListener messageListener = TransportMessageListener.NOOP_LISTENER; @@ -54,7 +56,8 @@ public class InboundHandler { TransportHandshaker handshaker, TransportKeepAlive keepAlive, Transport.RequestHandlers requestHandlers, - Transport.ResponseHandlers responseHandlers + Transport.ResponseHandlers responseHandlers, + HandlingTimeTracker handlingTimeTracker ) { this.threadPool = threadPool; this.outboundHandler = outboundHandler; @@ -63,6 +66,7 @@ public class InboundHandler { this.keepAlive = keepAlive; this.requestHandlers = requestHandlers; this.responseHandlers = responseHandlers; + this.handlingTimeTracker = handlingTimeTracker; } void setMessageListener(TransportMessageListener listener) { @@ -157,6 +161,7 @@ private void messageReceived(TcpChannel channel, InboundMessage message, long st } } finally { final long took = threadPool.relativeTimeInMillis() - startTime; + handlingTimeTracker.addHandlingTime(took); final long logThreshold = slowLogThresholdMs; if (logThreshold > 0 && took > logThreshold) { if (isRequest) { diff --git a/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java b/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java index 6597a8462e57c..1e390310917a0 100644 --- a/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java +++ b/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput; import org.elasticsearch.common.network.CloseableChannel; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.recycler.Recycler; import org.elasticsearch.common.transport.NetworkExceptionHelper; import org.elasticsearch.common.transport.TransportAddress; @@ -38,17 +39,26 @@ final class OutboundHandler { private final StatsTracker statsTracker; private final ThreadPool threadPool; private final Recycler recycler; + private final HandlingTimeTracker handlingTimeTracker; private volatile long slowLogThresholdMs = Long.MAX_VALUE; private volatile TransportMessageListener messageListener = TransportMessageListener.NOOP_LISTENER; - OutboundHandler(String nodeName, Version version, StatsTracker statsTracker, ThreadPool threadPool, Recycler recycler) { + OutboundHandler( + String nodeName, + Version version, + StatsTracker statsTracker, + ThreadPool threadPool, + Recycler recycler, + HandlingTimeTracker handlingTimeTracker + ) { this.nodeName = nodeName; this.version = version; this.statsTracker = statsTracker; this.threadPool = threadPool; this.recycler = recycler; + this.handlingTimeTracker = handlingTimeTracker; } void setSlowLogThreshold(TimeValue slowLogThreshold) { @@ -199,6 +209,7 @@ private void maybeLogSlowMessage(boolean success) { final long logThreshold = slowLogThresholdMs; if (logThreshold > 0) { final long took = threadPool.relativeTimeInMillis() - startTime; + handlingTimeTracker.addHandlingTime(took); if (took > logThreshold) { logger.warn( "sending transport message [{}] of size [{}] on [{}] took [{}ms] which is above the warn " diff --git a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java index 66a631604d8df..d3366213d5962 100644 --- a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java +++ b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.network.CloseableChannel; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.network.NetworkUtils; @@ -140,7 +141,14 @@ public TcpTransport( String nodeName = Node.NODE_NAME_SETTING.get(settings); this.recycler = createRecycler(settings, pageCacheRecycler); - this.outboundHandler = new OutboundHandler(nodeName, version, statsTracker, threadPool, recycler); + this.outboundHandler = new OutboundHandler( + nodeName, + version, + statsTracker, + threadPool, + recycler, + networkService.getHandlingTimeTracker() + ); this.handshaker = new TransportHandshaker( version, threadPool, @@ -164,7 +172,8 @@ public TcpTransport( handshaker, keepAlive, requestHandlers, - responseHandlers + responseHandlers, + networkService.getHandlingTimeTracker() ); } @@ -912,7 +921,9 @@ public final TransportStats getStats() { messagesReceived, bytesRead, messagesSent, - bytesWritten + bytesWritten, + networkService.getHandlingTimeTracker().getHistogram(), + HandlingTimeTracker.getBucketUpperBounds() ); } diff --git a/server/src/main/java/org/elasticsearch/transport/TransportStats.java b/server/src/main/java/org/elasticsearch/transport/TransportStats.java index 7caf3c241615c..b00acf1f40e90 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportStats.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportStats.java @@ -8,6 +8,7 @@ package org.elasticsearch.transport; +import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -25,14 +26,28 @@ public class TransportStats implements Writeable, ToXContentFragment { private final long rxSize; private final long txCount; private final long txSize; - - public TransportStats(long serverOpen, long totalOutboundConnections, long rxCount, long rxSize, long txCount, long txSize) { + private final long[] handlingTimeBucketFrequencies; + private final int[] handlingTimeBucketBounds; + + public TransportStats( + long serverOpen, + long totalOutboundConnections, + long rxCount, + long rxSize, + long txCount, + long txSize, + long[] handlingTimeBucketFrequencies, + int[] handlingTimeBucketBounds + ) { this.serverOpen = serverOpen; this.totalOutboundConnections = totalOutboundConnections; this.rxCount = rxCount; this.rxSize = rxSize; this.txCount = txCount; this.txSize = txSize; + this.handlingTimeBucketFrequencies = handlingTimeBucketFrequencies; + this.handlingTimeBucketBounds = handlingTimeBucketBounds; + assert assertHistogramConsistent(); } public TransportStats(StreamInput in) throws IOException { @@ -42,6 +57,14 @@ public TransportStats(StreamInput in) throws IOException { rxSize = in.readVLong(); txCount = in.readVLong(); txSize = in.readVLong(); + if (in.getVersion().onOrAfter(Version.V_8_1_0)) { + handlingTimeBucketFrequencies = in.readLongArray(); + handlingTimeBucketBounds = in.readIntArray(); + } else { + handlingTimeBucketFrequencies = new long[0]; + handlingTimeBucketBounds = new int[0]; + } + assert assertHistogramConsistent(); } @Override @@ -52,6 +75,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(rxSize); out.writeVLong(txCount); out.writeVLong(txSize); + if (out.getVersion().onOrAfter(Version.V_8_1_0)) { + out.writeLongArray(handlingTimeBucketFrequencies); + out.writeIntArray(handlingTimeBucketBounds); + } } public long serverOpen() { @@ -94,6 +121,29 @@ public ByteSizeValue getTxSize() { return txSize(); } + public long[] getHandlingTimeBucketFrequencies() { + final long[] histogram = new long[handlingTimeBucketFrequencies.length]; + System.arraycopy(handlingTimeBucketFrequencies, 0, histogram, 0, handlingTimeBucketFrequencies.length); + return histogram; + } + + public int[] getHandlingTimeBucketBounds() { + final int[] bounds = new int[handlingTimeBucketBounds.length]; + System.arraycopy(handlingTimeBucketBounds, 0, bounds, 0, handlingTimeBucketBounds.length); + return bounds; + } + + private boolean assertHistogramConsistent() { + if (handlingTimeBucketFrequencies.length == 0) { + // Stats came from before v8.1 + assert Version.CURRENT.major == Version.V_8_0_0.major; + assert handlingTimeBucketBounds.length == 0; + } else { + assert handlingTimeBucketFrequencies.length == handlingTimeBucketBounds.length + 1; + } + return true; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.TRANSPORT); @@ -103,6 +153,24 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.humanReadableField(Fields.RX_SIZE_IN_BYTES, Fields.RX_SIZE, new ByteSizeValue(rxSize)); builder.field(Fields.TX_COUNT, txCount); builder.humanReadableField(Fields.TX_SIZE_IN_BYTES, Fields.TX_SIZE, new ByteSizeValue(txSize)); + if (handlingTimeBucketFrequencies.length > 0) { + builder.startArray(Fields.HANDLING_TIME_HISTOGRAM); + for (int i = 0; i < handlingTimeBucketFrequencies.length; i++) { + builder.startObject(); + if (i > 0 && i <= handlingTimeBucketBounds.length) { + builder.field("ge_millis", handlingTimeBucketBounds[i - 1]); + } + if (i < handlingTimeBucketBounds.length) { + builder.field("lt_millis", handlingTimeBucketBounds[i]); + } + builder.field("count", handlingTimeBucketFrequencies[i]); + builder.endObject(); + } + builder.endArray(); + } else { + // Stats came from before v8.1 + assert Version.CURRENT.major == Version.V_8_0_0.major; + } builder.endObject(); return builder; } @@ -117,5 +185,6 @@ static final class Fields { static final String TX_COUNT = "tx_count"; static final String TX_SIZE = "tx_size"; static final String TX_SIZE_IN_BYTES = "tx_size_in_bytes"; + static final String HANDLING_TIME_HISTOGRAM = "handling_time_histogram"; } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 144c9f0843441..6889117c92cd5 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.cluster.service.ClusterStateUpdateStats; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.core.Tuple; import org.elasticsearch.discovery.DiscoveryStats; import org.elasticsearch.http.HttpStats; @@ -47,6 +48,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import java.util.stream.IntStream; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; @@ -238,6 +240,14 @@ public void testSerialization() throws IOException { assertEquals(nodeStats.getTransport().getServerOpen(), deserializedNodeStats.getTransport().getServerOpen()); assertEquals(nodeStats.getTransport().getTxCount(), deserializedNodeStats.getTransport().getTxCount()); assertEquals(nodeStats.getTransport().getTxSize(), deserializedNodeStats.getTransport().getTxSize()); + assertArrayEquals( + nodeStats.getTransport().getHandlingTimeBucketFrequencies(), + deserializedNodeStats.getTransport().getHandlingTimeBucketFrequencies() + ); + assertArrayEquals( + nodeStats.getTransport().getHandlingTimeBucketBounds(), + deserializedNodeStats.getTransport().getHandlingTimeBucketBounds() + ); } if (nodeStats.getHttp() == null) { assertNull(deserializedNodeStats.getHttp()); @@ -672,7 +682,9 @@ public static NodeStats createNodeStats() { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong() + randomNonNegativeLong(), + IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT).mapToLong(i -> randomNonNegativeLong()).toArray(), + IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT - 1).map(i -> between(0, Integer.MAX_VALUE)).toArray() ) : null; HttpStats httpStats = null; diff --git a/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java b/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java new file mode 100644 index 0000000000000..d0f59912adc9e --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.network; + +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.greaterThan; + +public class HandlingTimeTrackerTests extends ESTestCase { + + public void testHistogram() { + final HandlingTimeTracker handlingTimeTracker = new HandlingTimeTracker(); + + assertArrayEquals(new long[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(0L); + assertArrayEquals(new long[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(randomLongBetween(0L, 99L)); + assertArrayEquals(new long[] { 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(100L); + assertArrayEquals(new long[] { 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(299L); + assertArrayEquals(new long[] { 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(300L); + assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(65535L); + assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(65536L); + assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 1 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(Long.MAX_VALUE); + assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 2 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(randomLongBetween(65536L, Long.MAX_VALUE)); + assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(randomLongBetween(Long.MIN_VALUE, 99L)); + assertArrayEquals(new long[] { 3, 2, 1, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); + } + + public void testHistogramRandom() { + final int[] upperBounds = HandlingTimeTracker.getBucketUpperBounds(); + final long[] expectedCounts = new long[upperBounds.length + 1]; + final HandlingTimeTracker handlingTimeTracker = new HandlingTimeTracker(); + for (int i = between(0, 1000); i > 0; i--) { + final int bucket = between(0, expectedCounts.length - 1); + expectedCounts[bucket] += 1; + + final int lowerBound = bucket == 0 ? 0 : upperBounds[bucket - 1]; + final int upperBound = bucket == upperBounds.length ? randomBoolean() ? 100000 : Integer.MAX_VALUE : upperBounds[bucket] - 1; + handlingTimeTracker.addHandlingTime(between(lowerBound, upperBound)); + } + + assertArrayEquals(expectedCounts, handlingTimeTracker.getHistogram()); + } + + public void testBoundsConsistency() { + final int[] upperBounds = HandlingTimeTracker.getBucketUpperBounds(); + assertThat(upperBounds[0], greaterThan(0)); + for (int i = 1; i < upperBounds.length; i++) { + assertThat(upperBounds[i], greaterThan(upperBounds[i - 1])); + } + } + +} diff --git a/server/src/test/java/org/elasticsearch/transport/InboundHandlerTests.java b/server/src/test/java/org/elasticsearch/transport/InboundHandlerTests.java index a482a6bd713eb..fda0090125ab8 100644 --- a/server/src/test/java/org/elasticsearch/transport/InboundHandlerTests.java +++ b/server/src/test/java/org/elasticsearch/transport/InboundHandlerTests.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.core.TimeValue; @@ -69,7 +70,8 @@ public void setUp() throws Exception { version, new StatsTracker(), threadPool, - new BytesRefRecycler(PageCacheRecycler.NON_RECYCLING_INSTANCE) + new BytesRefRecycler(PageCacheRecycler.NON_RECYCLING_INSTANCE), + new HandlingTimeTracker() ); requestHandlers = new Transport.RequestHandlers(); responseHandlers = new Transport.ResponseHandlers(); @@ -80,7 +82,8 @@ public void setUp() throws Exception { handshaker, keepAlive, requestHandlers, - responseHandlers + responseHandlers, + new HandlingTimeTracker() ); } diff --git a/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java b/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java index 0100af921a247..183379cc50c8d 100644 --- a/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java +++ b/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.common.util.concurrent.ThreadContext; @@ -68,7 +69,7 @@ public void setUp() throws Exception { node = new DiscoveryNode("", transportAddress, Version.CURRENT); StatsTracker statsTracker = new StatsTracker(); compressionScheme = randomFrom(Compression.Scheme.DEFLATE, Compression.Scheme.LZ4); - handler = new OutboundHandler("node", Version.CURRENT, statsTracker, threadPool, recycler); + handler = new OutboundHandler("node", Version.CURRENT, statsTracker, threadPool, recycler, new HandlingTimeTracker()); final LongSupplier millisSupplier = () -> TimeValue.nsecToMSec(System.nanoTime()); final InboundDecoder decoder = new InboundDecoder(Version.CURRENT, this.recycler); diff --git a/server/src/test/java/org/elasticsearch/transport/TcpTransportTests.java b/server/src/test/java/org/elasticsearch/transport/TcpTransportTests.java index 47252ceb8a124..814afba514217 100644 --- a/server/src/test/java/org/elasticsearch/transport/TcpTransportTests.java +++ b/server/src/test/java/org/elasticsearch/transport/TcpTransportTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.component.Lifecycle; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.network.NetworkUtils; import org.elasticsearch.common.settings.Settings; @@ -541,7 +542,8 @@ private void testExceptionHandling( Version.CURRENT, new StatsTracker(), testThreadPool, - new BytesRefRecycler(new MockPageCacheRecycler(Settings.EMPTY)) + new BytesRefRecycler(new MockPageCacheRecycler(Settings.EMPTY)), + new HandlingTimeTracker() ) ); diff --git a/test/framework/src/main/java/org/elasticsearch/transport/TestTransportChannels.java b/test/framework/src/main/java/org/elasticsearch/transport/TestTransportChannels.java index 85a45cd7a691c..15b24b0a77e6f 100644 --- a/test/framework/src/main/java/org/elasticsearch/transport/TestTransportChannels.java +++ b/test/framework/src/main/java/org/elasticsearch/transport/TestTransportChannels.java @@ -9,6 +9,7 @@ package org.elasticsearch.transport; import org.elasticsearch.Version; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.threadpool.ThreadPool; @@ -24,7 +25,7 @@ public static TcpTransportChannel newFakeTcpTransportChannel( ) { BytesRefRecycler recycler = new BytesRefRecycler(PageCacheRecycler.NON_RECYCLING_INSTANCE); return new TcpTransportChannel( - new OutboundHandler(nodeName, version, new StatsTracker(), threadPool, recycler), + new OutboundHandler(nodeName, version, new StatsTracker(), threadPool, recycler, new HandlingTimeTracker()), channel, action, requestId, From 0d29128aa1d58d3ba0e5655437de02afeee60369 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 25 Nov 2021 15:04:58 +0000 Subject: [PATCH 2/7] Use raw times for better granularity --- .../org/elasticsearch/http/AbstractHttpServerTransport.java | 4 ++-- .../main/java/org/elasticsearch/transport/InboundHandler.java | 4 ++-- .../java/org/elasticsearch/transport/OutboundHandler.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java b/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java index 5ff1c666383db..125feb2c9fc77 100644 --- a/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java +++ b/server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java @@ -355,11 +355,11 @@ protected void serverAcceptedChannel(HttpChannel httpChannel) { */ public void incomingRequest(final HttpRequest httpRequest, final HttpChannel httpChannel) { httpClientStatsTracker.updateClientStats(httpRequest, httpChannel); - final long startTime = threadPool.relativeTimeInMillis(); + final long startTime = threadPool.rawRelativeTimeInMillis(); try { handleIncomingRequest(httpRequest, httpChannel, httpRequest.getInboundException()); } finally { - final long took = threadPool.relativeTimeInMillis() - startTime; + final long took = threadPool.rawRelativeTimeInMillis() - startTime; networkService.getHandlingTimeTracker().addHandlingTime(took); final long logThreshold = slowLogThresholdMs; if (logThreshold > 0 && took > logThreshold) { diff --git a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java index acf90143eeb1f..e6a9418a39899 100644 --- a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java +++ b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java @@ -83,7 +83,7 @@ void setSlowLogThreshold(TimeValue slowLogThreshold) { } void inboundMessage(TcpChannel channel, InboundMessage message) throws Exception { - final long startTime = threadPool.relativeTimeInMillis(); + final long startTime = threadPool.rawRelativeTimeInMillis(); channel.getChannelStats().markAccessed(startTime); TransportLogger.logInboundMessage(channel, message); @@ -161,7 +161,7 @@ private void messageReceived(TcpChannel channel, InboundMessage message, long st } } } finally { - final long took = threadPool.relativeTimeInMillis() - startTime; + final long took = threadPool.rawRelativeTimeInMillis() - startTime; handlingTimeTracker.addHandlingTime(took); final long logThreshold = slowLogThresholdMs; if (logThreshold > 0 && took > logThreshold) { diff --git a/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java b/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java index e04bb177f8754..18aeb12b81645 100644 --- a/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java +++ b/server/src/main/java/org/elasticsearch/transport/OutboundHandler.java @@ -178,7 +178,7 @@ private void internalSend( @Nullable OutboundMessage message, ActionListener listener ) { - final long startTime = threadPool.relativeTimeInMillis(); + final long startTime = threadPool.rawRelativeTimeInMillis(); channel.getChannelStats().markAccessed(startTime); final long messageSize = reference.length(); TransportLogger.logOutboundMessage(channel, reference); @@ -206,7 +206,7 @@ public void onFailure(Exception e) { private void maybeLogSlowMessage(boolean success) { final long logThreshold = slowLogThresholdMs; if (logThreshold > 0) { - final long took = threadPool.relativeTimeInMillis() - startTime; + final long took = threadPool.rawRelativeTimeInMillis() - startTime; handlingTimeTracker.addHandlingTime(took); if (took > logThreshold) { logger.warn( From a1ce07379aef5a64465c8ce879dd4084fa8d727d Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 25 Nov 2021 14:55:02 +0000 Subject: [PATCH 3/7] Finer-grained buckets --- .../test/nodes.stats/60_transport_stats.yml | 14 +++---- .../common/network/HandlingTimeTracker.java | 13 ++++--- .../transport/InboundHandler.java | 2 - .../network/HandlingTimeTrackerTests.java | 38 +++++++++++-------- .../transport/OutboundHandlerTests.java | 3 +- 5 files changed, 37 insertions(+), 33 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml index bbfad5e60cec6..55ac31f8df47d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml @@ -37,16 +37,16 @@ nodes.stats: metric: [ transport ] - - length: { nodes.$node_id.transport.handling_time_histogram: 11 } + - length: { nodes.$node_id.transport.handling_time_histogram: 17 } - gte: { nodes.$node_id.transport.handling_time_histogram.0.count: 0 } - is_false: nodes.$node_id.transport.handling_time_histogram.0.ge_millis - - match: { nodes.$node_id.transport.handling_time_histogram.0.lt_millis: 100 } + - match: { nodes.$node_id.transport.handling_time_histogram.0.lt_millis: 1 } - gte: { nodes.$node_id.transport.handling_time_histogram.1.count: 0 } - - match: { nodes.$node_id.transport.handling_time_histogram.1.ge_millis: 100 } - - match: { nodes.$node_id.transport.handling_time_histogram.1.lt_millis: 300 } + - match: { nodes.$node_id.transport.handling_time_histogram.1.ge_millis: 1 } + - match: { nodes.$node_id.transport.handling_time_histogram.1.lt_millis: 2 } - - gte: { nodes.$node_id.transport.handling_time_histogram.10.count: 0 } - - match: { nodes.$node_id.transport.handling_time_histogram.10.ge_millis: 65536 } - - is_false: nodes.$node_id.transport.handling_time_histogram.10.lt_millis + - gte: { nodes.$node_id.transport.handling_time_histogram.16.count: 0 } + - match: { nodes.$node_id.transport.handling_time_histogram.16.ge_millis: 65536 } + - is_false: nodes.$node_id.transport.handling_time_histogram.16.lt_millis diff --git a/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java index 96050b31a967a..831df6e7c1075 100644 --- a/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java +++ b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java @@ -16,19 +16,20 @@ public class HandlingTimeTracker { public static int[] getBucketUpperBounds() { - // Default clock resolution is 200ms so we have buckets for the 0-tick and 1-tick cases, then go up in powers of two - return new int[] { 100, 300, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16 }; + int[] bounds = new int[17]; + for (int i = 0; i < bounds.length; i++) { + bounds[i] = 1 << i; + } + return bounds; } private static int getBucket(long handlingTimeMillis) { - if (handlingTimeMillis < 100L) { + if (handlingTimeMillis <= 0) { return 0; - } else if (handlingTimeMillis < 300L) { - return 1; } else if (1L << 16 <= handlingTimeMillis) { return BUCKET_COUNT - 1; } else { - return Long.SIZE - 7 - Long.numberOfLeadingZeros(handlingTimeMillis); + return Long.SIZE - Long.numberOfLeadingZeros(handlingTimeMillis); } } diff --git a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java index e6a9418a39899..ad2e3a9e38a3c 100644 --- a/server/src/main/java/org/elasticsearch/transport/InboundHandler.java +++ b/server/src/main/java/org/elasticsearch/transport/InboundHandler.java @@ -18,8 +18,6 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.network.HandlingTimeTracker; -import org.elasticsearch.common.transport.TransportAddress; -import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.TimeValue; diff --git a/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java b/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java index d0f59912adc9e..b999cf8ff4875 100644 --- a/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java +++ b/server/src/test/java/org/elasticsearch/common/network/HandlingTimeTrackerTests.java @@ -17,37 +17,43 @@ public class HandlingTimeTrackerTests extends ESTestCase { public void testHistogram() { final HandlingTimeTracker handlingTimeTracker = new HandlingTimeTracker(); - assertArrayEquals(new long[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); handlingTimeTracker.addHandlingTime(0L); - assertArrayEquals(new long[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); - handlingTimeTracker.addHandlingTime(randomLongBetween(0L, 99L)); - assertArrayEquals(new long[] { 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + handlingTimeTracker.addHandlingTime(1L); + assertArrayEquals(new long[] { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); - handlingTimeTracker.addHandlingTime(100L); - assertArrayEquals(new long[] { 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + handlingTimeTracker.addHandlingTime(2L); + assertArrayEquals(new long[] { 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); - handlingTimeTracker.addHandlingTime(299L); - assertArrayEquals(new long[] { 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + handlingTimeTracker.addHandlingTime(3L); + assertArrayEquals(new long[] { 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); - handlingTimeTracker.addHandlingTime(300L); - assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + handlingTimeTracker.addHandlingTime(4L); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(127L); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); + + handlingTimeTracker.addHandlingTime(128L); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, handlingTimeTracker.getHistogram()); handlingTimeTracker.addHandlingTime(65535L); - assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 0 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, handlingTimeTracker.getHistogram()); handlingTimeTracker.addHandlingTime(65536L); - assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 1 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1 }, handlingTimeTracker.getHistogram()); handlingTimeTracker.addHandlingTime(Long.MAX_VALUE); - assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 2 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2 }, handlingTimeTracker.getHistogram()); handlingTimeTracker.addHandlingTime(randomLongBetween(65536L, Long.MAX_VALUE)); - assertArrayEquals(new long[] { 2, 2, 1, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); + assertArrayEquals(new long[] { 1, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); - handlingTimeTracker.addHandlingTime(randomLongBetween(Long.MIN_VALUE, 99L)); - assertArrayEquals(new long[] { 3, 2, 1, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); + handlingTimeTracker.addHandlingTime(randomLongBetween(Long.MIN_VALUE, 0L)); + assertArrayEquals(new long[] { 2, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 3 }, handlingTimeTracker.getHistogram()); } public void testHistogramRandom() { diff --git a/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java b/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java index 5d44452e59988..4a85ab868d890 100644 --- a/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java +++ b/server/src/test/java/org/elasticsearch/transport/OutboundHandlerTests.java @@ -23,9 +23,8 @@ import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.logging.Loggers; -import org.elasticsearch.common.network.NetworkAddress; -import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.network.HandlingTimeTracker; +import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.common.util.concurrent.ThreadContext; From b527e8222d75891cbae412015f911e442f4e2719 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 25 Nov 2021 15:02:58 +0000 Subject: [PATCH 4/7] Bucket count & bounds are known, no need to send over the wire --- .../elasticsearch/transport/TcpTransport.java | 4 +- .../transport/TransportStats.java | 37 ++++++++----------- .../cluster/node/stats/NodeStatsTests.java | 7 +--- 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java index 4817f84905ca2..e704b27b65f7f 100644 --- a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java +++ b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java @@ -29,7 +29,6 @@ import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.network.CloseableChannel; -import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.network.NetworkUtils; @@ -928,8 +927,7 @@ public final TransportStats getStats() { bytesRead, messagesSent, bytesWritten, - networkService.getHandlingTimeTracker().getHistogram(), - HandlingTimeTracker.getBucketUpperBounds() + networkService.getHandlingTimeTracker().getHistogram() ); } diff --git a/server/src/main/java/org/elasticsearch/transport/TransportStats.java b/server/src/main/java/org/elasticsearch/transport/TransportStats.java index b00acf1f40e90..2d7e44b652ff0 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportStats.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportStats.java @@ -12,11 +12,13 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.Arrays; public class TransportStats implements Writeable, ToXContentFragment { @@ -27,7 +29,6 @@ public class TransportStats implements Writeable, ToXContentFragment { private final long txCount; private final long txSize; private final long[] handlingTimeBucketFrequencies; - private final int[] handlingTimeBucketBounds; public TransportStats( long serverOpen, @@ -36,8 +37,7 @@ public TransportStats( long rxSize, long txCount, long txSize, - long[] handlingTimeBucketFrequencies, - int[] handlingTimeBucketBounds + long[] handlingTimeBucketFrequencies ) { this.serverOpen = serverOpen; this.totalOutboundConnections = totalOutboundConnections; @@ -46,7 +46,6 @@ public TransportStats( this.txCount = txCount; this.txSize = txSize; this.handlingTimeBucketFrequencies = handlingTimeBucketFrequencies; - this.handlingTimeBucketBounds = handlingTimeBucketBounds; assert assertHistogramConsistent(); } @@ -57,12 +56,13 @@ public TransportStats(StreamInput in) throws IOException { rxSize = in.readVLong(); txCount = in.readVLong(); txSize = in.readVLong(); - if (in.getVersion().onOrAfter(Version.V_8_1_0)) { - handlingTimeBucketFrequencies = in.readLongArray(); - handlingTimeBucketBounds = in.readIntArray(); + if (in.getVersion().onOrAfter(Version.V_8_1_0) && in.readBoolean()) { + handlingTimeBucketFrequencies = new long[HandlingTimeTracker.BUCKET_COUNT]; + for (int i = 0; i < handlingTimeBucketFrequencies.length; i++) { + handlingTimeBucketFrequencies[i] = in.readVLong(); + } } else { handlingTimeBucketFrequencies = new long[0]; - handlingTimeBucketBounds = new int[0]; } assert assertHistogramConsistent(); } @@ -76,8 +76,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(txCount); out.writeVLong(txSize); if (out.getVersion().onOrAfter(Version.V_8_1_0)) { - out.writeLongArray(handlingTimeBucketFrequencies); - out.writeIntArray(handlingTimeBucketBounds); + out.writeBoolean(handlingTimeBucketFrequencies.length > 0); + for (long handlingTimeBucketFrequency : handlingTimeBucketFrequencies) { + out.writeVLong(handlingTimeBucketFrequency); + } } } @@ -122,24 +124,15 @@ public ByteSizeValue getTxSize() { } public long[] getHandlingTimeBucketFrequencies() { - final long[] histogram = new long[handlingTimeBucketFrequencies.length]; - System.arraycopy(handlingTimeBucketFrequencies, 0, histogram, 0, handlingTimeBucketFrequencies.length); - return histogram; - } - - public int[] getHandlingTimeBucketBounds() { - final int[] bounds = new int[handlingTimeBucketBounds.length]; - System.arraycopy(handlingTimeBucketBounds, 0, bounds, 0, handlingTimeBucketBounds.length); - return bounds; + return Arrays.copyOf(handlingTimeBucketFrequencies, handlingTimeBucketFrequencies.length); } private boolean assertHistogramConsistent() { if (handlingTimeBucketFrequencies.length == 0) { // Stats came from before v8.1 assert Version.CURRENT.major == Version.V_8_0_0.major; - assert handlingTimeBucketBounds.length == 0; } else { - assert handlingTimeBucketFrequencies.length == handlingTimeBucketBounds.length + 1; + assert handlingTimeBucketFrequencies.length == HandlingTimeTracker.BUCKET_COUNT; } return true; } @@ -154,6 +147,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(Fields.TX_COUNT, txCount); builder.humanReadableField(Fields.TX_SIZE_IN_BYTES, Fields.TX_SIZE, new ByteSizeValue(txSize)); if (handlingTimeBucketFrequencies.length > 0) { + final int[] handlingTimeBucketBounds = HandlingTimeTracker.getBucketUpperBounds(); + assert handlingTimeBucketFrequencies.length == handlingTimeBucketBounds.length + 1; builder.startArray(Fields.HANDLING_TIME_HISTOGRAM); for (int i = 0; i < handlingTimeBucketFrequencies.length; i++) { builder.startObject(); diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 6889117c92cd5..732ca557fbaf6 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -244,10 +244,6 @@ public void testSerialization() throws IOException { nodeStats.getTransport().getHandlingTimeBucketFrequencies(), deserializedNodeStats.getTransport().getHandlingTimeBucketFrequencies() ); - assertArrayEquals( - nodeStats.getTransport().getHandlingTimeBucketBounds(), - deserializedNodeStats.getTransport().getHandlingTimeBucketBounds() - ); } if (nodeStats.getHttp() == null) { assertNull(deserializedNodeStats.getHttp()); @@ -683,8 +679,7 @@ public static NodeStats createNodeStats() { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), - IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT).mapToLong(i -> randomNonNegativeLong()).toArray(), - IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT - 1).map(i -> between(0, Integer.MAX_VALUE)).toArray() + IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT).mapToLong(i -> randomNonNegativeLong()).toArray() ) : null; HttpStats httpStats = null; From c775e4efec5811ef3a946ac403ef3065a860a1ca Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 25 Nov 2021 15:47:06 +0000 Subject: [PATCH 5/7] Separate inbound & outbound histograms --- .../test/nodes.stats/60_transport_stats.yml | 37 +++++--- .../elasticsearch/transport/TcpTransport.java | 14 ++- .../transport/TransportStats.java | 89 ++++++++++++------- .../cluster/node/stats/NodeStatsTests.java | 9 +- 4 files changed, 94 insertions(+), 55 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml index 55ac31f8df47d..3c3b4e6dacdf5 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml @@ -25,7 +25,7 @@ "Transport handling time histogram": - skip: version: " - 8.0.99" - reason: "handling_time_histogram was added in 8.1" + reason: "handling_time_histograms were added in 8.1" features: [arbitrary_key] - do: @@ -37,16 +37,31 @@ nodes.stats: metric: [ transport ] - - length: { nodes.$node_id.transport.handling_time_histogram: 17 } + - length: { nodes.$node_id.transport.inbound_handling_time_histogram: 18 } - - gte: { nodes.$node_id.transport.handling_time_histogram.0.count: 0 } - - is_false: nodes.$node_id.transport.handling_time_histogram.0.ge_millis - - match: { nodes.$node_id.transport.handling_time_histogram.0.lt_millis: 1 } + - gte: { nodes.$node_id.transport.inbound_handling_time_histogram.0.count: 0 } + - is_false: nodes.$node_id.transport.inbound_handling_time_histogram.0.ge_millis + - match: { nodes.$node_id.transport.inbound_handling_time_histogram.0.lt_millis: 1 } - - gte: { nodes.$node_id.transport.handling_time_histogram.1.count: 0 } - - match: { nodes.$node_id.transport.handling_time_histogram.1.ge_millis: 1 } - - match: { nodes.$node_id.transport.handling_time_histogram.1.lt_millis: 2 } + - gte: { nodes.$node_id.transport.inbound_handling_time_histogram.1.count: 0 } + - match: { nodes.$node_id.transport.inbound_handling_time_histogram.1.ge_millis: 1 } + - match: { nodes.$node_id.transport.inbound_handling_time_histogram.1.lt_millis: 2 } - - gte: { nodes.$node_id.transport.handling_time_histogram.16.count: 0 } - - match: { nodes.$node_id.transport.handling_time_histogram.16.ge_millis: 65536 } - - is_false: nodes.$node_id.transport.handling_time_histogram.16.lt_millis + - gte: { nodes.$node_id.transport.inbound_handling_time_histogram.17.count: 0 } + - match: { nodes.$node_id.transport.inbound_handling_time_histogram.17.ge_millis: 65536 } + - is_false: nodes.$node_id.transport.inbound_handling_time_histogram.17.lt_millis + + + - length: { nodes.$node_id.transport.outbound_handling_time_histogram: 18 } + + - gte: { nodes.$node_id.transport.outbound_handling_time_histogram.0.count: 0 } + - is_false: nodes.$node_id.transport.outbound_handling_time_histogram.0.ge_millis + - match: { nodes.$node_id.transport.outbound_handling_time_histogram.0.lt_millis: 1 } + + - gte: { nodes.$node_id.transport.outbound_handling_time_histogram.1.count: 0 } + - match: { nodes.$node_id.transport.outbound_handling_time_histogram.1.ge_millis: 1 } + - match: { nodes.$node_id.transport.outbound_handling_time_histogram.1.lt_millis: 2 } + + - gte: { nodes.$node_id.transport.outbound_handling_time_histogram.17.count: 0 } + - match: { nodes.$node_id.transport.outbound_handling_time_histogram.17.ge_millis: 65536 } + - is_false: nodes.$node_id.transport.outbound_handling_time_histogram.17.lt_millis diff --git a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java index e704b27b65f7f..6462701265383 100644 --- a/server/src/main/java/org/elasticsearch/transport/TcpTransport.java +++ b/server/src/main/java/org/elasticsearch/transport/TcpTransport.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.network.CloseableChannel; +import org.elasticsearch.common.network.HandlingTimeTracker; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.network.NetworkUtils; @@ -116,6 +117,7 @@ public abstract class TcpTransport extends AbstractLifecycleComponent implements private final TransportHandshaker handshaker; private final TransportKeepAlive keepAlive; + private final HandlingTimeTracker outboundHandlingTimeTracker = new HandlingTimeTracker(); private final OutboundHandler outboundHandler; private final InboundHandler inboundHandler; private final ResponseHandlers responseHandlers = new ResponseHandlers(); @@ -141,14 +143,7 @@ public TcpTransport( String nodeName = Node.NODE_NAME_SETTING.get(settings); this.recycler = createRecycler(settings, pageCacheRecycler); - this.outboundHandler = new OutboundHandler( - nodeName, - version, - statsTracker, - threadPool, - recycler, - networkService.getHandlingTimeTracker() - ); + this.outboundHandler = new OutboundHandler(nodeName, version, statsTracker, threadPool, recycler, outboundHandlingTimeTracker); this.handshaker = new TransportHandshaker( version, threadPool, @@ -927,7 +922,8 @@ public final TransportStats getStats() { bytesRead, messagesSent, bytesWritten, - networkService.getHandlingTimeTracker().getHistogram() + networkService.getHandlingTimeTracker().getHistogram(), + outboundHandlingTimeTracker.getHistogram() ); } diff --git a/server/src/main/java/org/elasticsearch/transport/TransportStats.java b/server/src/main/java/org/elasticsearch/transport/TransportStats.java index 2d7e44b652ff0..d578c8437da97 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportStats.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportStats.java @@ -28,7 +28,8 @@ public class TransportStats implements Writeable, ToXContentFragment { private final long rxSize; private final long txCount; private final long txSize; - private final long[] handlingTimeBucketFrequencies; + private final long[] inboundHandlingTimeBucketFrequencies; + private final long[] outboundHandlingTimeBucketFrequencies; public TransportStats( long serverOpen, @@ -37,7 +38,8 @@ public TransportStats( long rxSize, long txCount, long txSize, - long[] handlingTimeBucketFrequencies + long[] inboundHandlingTimeBucketFrequencies, + long[] outboundHandlingTimeBucketFrequencies ) { this.serverOpen = serverOpen; this.totalOutboundConnections = totalOutboundConnections; @@ -45,8 +47,9 @@ public TransportStats( this.rxSize = rxSize; this.txCount = txCount; this.txSize = txSize; - this.handlingTimeBucketFrequencies = handlingTimeBucketFrequencies; - assert assertHistogramConsistent(); + this.inboundHandlingTimeBucketFrequencies = inboundHandlingTimeBucketFrequencies; + this.outboundHandlingTimeBucketFrequencies = outboundHandlingTimeBucketFrequencies; + assert assertHistogramsConsistent(); } public TransportStats(StreamInput in) throws IOException { @@ -57,14 +60,19 @@ public TransportStats(StreamInput in) throws IOException { txCount = in.readVLong(); txSize = in.readVLong(); if (in.getVersion().onOrAfter(Version.V_8_1_0) && in.readBoolean()) { - handlingTimeBucketFrequencies = new long[HandlingTimeTracker.BUCKET_COUNT]; - for (int i = 0; i < handlingTimeBucketFrequencies.length; i++) { - handlingTimeBucketFrequencies[i] = in.readVLong(); + inboundHandlingTimeBucketFrequencies = new long[HandlingTimeTracker.BUCKET_COUNT]; + for (int i = 0; i < inboundHandlingTimeBucketFrequencies.length; i++) { + inboundHandlingTimeBucketFrequencies[i] = in.readVLong(); + } + outboundHandlingTimeBucketFrequencies = new long[HandlingTimeTracker.BUCKET_COUNT]; + for (int i = 0; i < inboundHandlingTimeBucketFrequencies.length; i++) { + outboundHandlingTimeBucketFrequencies[i] = in.readVLong(); } } else { - handlingTimeBucketFrequencies = new long[0]; + inboundHandlingTimeBucketFrequencies = new long[0]; + outboundHandlingTimeBucketFrequencies = new long[0]; } - assert assertHistogramConsistent(); + assert assertHistogramsConsistent(); } @Override @@ -76,8 +84,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(txCount); out.writeVLong(txSize); if (out.getVersion().onOrAfter(Version.V_8_1_0)) { - out.writeBoolean(handlingTimeBucketFrequencies.length > 0); - for (long handlingTimeBucketFrequency : handlingTimeBucketFrequencies) { + assert (inboundHandlingTimeBucketFrequencies.length > 0) == (outboundHandlingTimeBucketFrequencies.length > 0); + out.writeBoolean(inboundHandlingTimeBucketFrequencies.length > 0); + for (long handlingTimeBucketFrequency : inboundHandlingTimeBucketFrequencies) { + out.writeVLong(handlingTimeBucketFrequency); + } + for (long handlingTimeBucketFrequency : outboundHandlingTimeBucketFrequencies) { out.writeVLong(handlingTimeBucketFrequency); } } @@ -123,16 +135,21 @@ public ByteSizeValue getTxSize() { return txSize(); } - public long[] getHandlingTimeBucketFrequencies() { - return Arrays.copyOf(handlingTimeBucketFrequencies, handlingTimeBucketFrequencies.length); + public long[] getInboundHandlingTimeBucketFrequencies() { + return Arrays.copyOf(inboundHandlingTimeBucketFrequencies, inboundHandlingTimeBucketFrequencies.length); } - private boolean assertHistogramConsistent() { - if (handlingTimeBucketFrequencies.length == 0) { + public long[] getOutboundHandlingTimeBucketFrequencies() { + return Arrays.copyOf(outboundHandlingTimeBucketFrequencies, outboundHandlingTimeBucketFrequencies.length); + } + + private boolean assertHistogramsConsistent() { + assert inboundHandlingTimeBucketFrequencies.length == outboundHandlingTimeBucketFrequencies.length; + if (inboundHandlingTimeBucketFrequencies.length == 0) { // Stats came from before v8.1 assert Version.CURRENT.major == Version.V_8_0_0.major; } else { - assert handlingTimeBucketFrequencies.length == HandlingTimeTracker.BUCKET_COUNT; + assert inboundHandlingTimeBucketFrequencies.length == HandlingTimeTracker.BUCKET_COUNT; } return true; } @@ -146,22 +163,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.humanReadableField(Fields.RX_SIZE_IN_BYTES, Fields.RX_SIZE, new ByteSizeValue(rxSize)); builder.field(Fields.TX_COUNT, txCount); builder.humanReadableField(Fields.TX_SIZE_IN_BYTES, Fields.TX_SIZE, new ByteSizeValue(txSize)); - if (handlingTimeBucketFrequencies.length > 0) { - final int[] handlingTimeBucketBounds = HandlingTimeTracker.getBucketUpperBounds(); - assert handlingTimeBucketFrequencies.length == handlingTimeBucketBounds.length + 1; - builder.startArray(Fields.HANDLING_TIME_HISTOGRAM); - for (int i = 0; i < handlingTimeBucketFrequencies.length; i++) { - builder.startObject(); - if (i > 0 && i <= handlingTimeBucketBounds.length) { - builder.field("ge_millis", handlingTimeBucketBounds[i - 1]); - } - if (i < handlingTimeBucketBounds.length) { - builder.field("lt_millis", handlingTimeBucketBounds[i]); - } - builder.field("count", handlingTimeBucketFrequencies[i]); - builder.endObject(); - } - builder.endArray(); + if (inboundHandlingTimeBucketFrequencies.length > 0) { + histogramToXContent(builder, inboundHandlingTimeBucketFrequencies, Fields.INBOUND_HANDLING_TIME_HISTOGRAM); + histogramToXContent(builder, outboundHandlingTimeBucketFrequencies, Fields.OUTBOUND_HANDLING_TIME_HISTOGRAM); } else { // Stats came from before v8.1 assert Version.CURRENT.major == Version.V_8_0_0.major; @@ -170,6 +174,24 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + private void histogramToXContent(XContentBuilder builder, long[] bucketFrequencies, String fieldName) throws IOException { + final int[] bucketBounds = HandlingTimeTracker.getBucketUpperBounds(); + assert bucketFrequencies.length == bucketBounds.length + 1; + builder.startArray(fieldName); + for (int i = 0; i < bucketFrequencies.length; i++) { + builder.startObject(); + if (i > 0 && i <= bucketBounds.length) { + builder.field("ge_millis", bucketBounds[i - 1]); + } + if (i < bucketBounds.length) { + builder.field("lt_millis", bucketBounds[i]); + } + builder.field("count", bucketFrequencies[i]); + builder.endObject(); + } + builder.endArray(); + } + static final class Fields { static final String TRANSPORT = "transport"; static final String SERVER_OPEN = "server_open"; @@ -180,6 +202,7 @@ static final class Fields { static final String TX_COUNT = "tx_count"; static final String TX_SIZE = "tx_size"; static final String TX_SIZE_IN_BYTES = "tx_size_in_bytes"; - static final String HANDLING_TIME_HISTOGRAM = "handling_time_histogram"; + static final String INBOUND_HANDLING_TIME_HISTOGRAM = "inbound_handling_time_histogram"; + static final String OUTBOUND_HANDLING_TIME_HISTOGRAM = "outbound_handling_time_histogram"; } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 732ca557fbaf6..2467aded6292f 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -241,8 +241,12 @@ public void testSerialization() throws IOException { assertEquals(nodeStats.getTransport().getTxCount(), deserializedNodeStats.getTransport().getTxCount()); assertEquals(nodeStats.getTransport().getTxSize(), deserializedNodeStats.getTransport().getTxSize()); assertArrayEquals( - nodeStats.getTransport().getHandlingTimeBucketFrequencies(), - deserializedNodeStats.getTransport().getHandlingTimeBucketFrequencies() + nodeStats.getTransport().getInboundHandlingTimeBucketFrequencies(), + deserializedNodeStats.getTransport().getInboundHandlingTimeBucketFrequencies() + ); + assertArrayEquals( + nodeStats.getTransport().getOutboundHandlingTimeBucketFrequencies(), + deserializedNodeStats.getTransport().getOutboundHandlingTimeBucketFrequencies() ); } if (nodeStats.getHttp() == null) { @@ -679,6 +683,7 @@ public static NodeStats createNodeStats() { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT).mapToLong(i -> randomNonNegativeLong()).toArray(), IntStream.range(0, HandlingTimeTracker.BUCKET_COUNT).mapToLong(i -> randomNonNegativeLong()).toArray() ) : null; From 6a967b98478b11addfba1f0651bb4aff40fb6562 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 25 Nov 2021 15:51:15 +0000 Subject: [PATCH 6/7] Fix docs --- docs/reference/cluster/nodes-stats.asciidoc | 34 ++++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index bd2b8ffad7590..a909335fd30ee 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -1900,12 +1900,12 @@ Size of TX packets sent by the node during internal cluster communication. Size, in bytes, of TX packets sent by the node during internal cluster communication. -`handling_time_histogram`:: +`inbound_handling_time_histogram`:: (array) -The distribution of the time spent handling a message on a transport thread, -represented as a histogram. +The distribution of the time spent handling each inbound message on a transport +thread, represented as a histogram. + -.Properties of `handling_time_histogram` +.Properties of `inbound_handling_time_histogram` [%collapsible] ======= `ge_millis`:: @@ -1921,7 +1921,31 @@ bucket since this bucket has no upper bound. `count`:: (integer) The number of times a transport thread took a period of time within the bounds -of this bucket to handle a message. +of this bucket to handle an inbound message. +======= + +`outbound_handling_time_histogram`:: +(array) +The distribution of the time spent sending each outbound transport message on a +transport thread, represented as a histogram. ++ +.Properties of `outbound_handling_time_histogram` +[%collapsible] +======= +`ge_millis`:: +(integer) +The inclusive lower bound of the bucket in milliseconds. Omitted on the first +bucket since this bucket has no lower bound. + +`lt_millis`:: +(integer) +The exclusive upper bound of the bucket in milliseconds. Omitted on the last +bucket since this bucket has no upper bound. + +`count`:: +(integer) +The number of times a transport thread took a period of time within the bounds +of this bucket to send a transport message. ======= ====== From 8419a2d66a6a1d6d0d82ef69c13ddc6de46ee9cb Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 26 Nov 2021 10:06:41 +0000 Subject: [PATCH 7/7] Less magic --- .../org/elasticsearch/common/network/HandlingTimeTracker.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java index 831df6e7c1075..a2787cb2d5332 100644 --- a/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java +++ b/server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java @@ -26,7 +26,7 @@ public static int[] getBucketUpperBounds() { private static int getBucket(long handlingTimeMillis) { if (handlingTimeMillis <= 0) { return 0; - } else if (1L << 16 <= handlingTimeMillis) { + } else if (LAST_BUCKET_LOWER_BOUND <= handlingTimeMillis) { return BUCKET_COUNT - 1; } else { return Long.SIZE - Long.numberOfLeadingZeros(handlingTimeMillis); @@ -35,6 +35,8 @@ private static int getBucket(long handlingTimeMillis) { public static final int BUCKET_COUNT = getBucketUpperBounds().length + 1; + private static final long LAST_BUCKET_LOWER_BOUND = getBucketUpperBounds()[BUCKET_COUNT - 2]; + private final LongAdder[] buckets; public HandlingTimeTracker() {