Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record more detailed HTTP stats #99852

Merged
merged 25 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c22fe4a
WIP
ywangd Sep 25, 2023
56d857a
Update docs/changelog/99852.yaml
ywangd Sep 25, 2023
2a4f24a
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Sep 26, 2023
32a9e3a
fix test
ywangd Sep 26, 2023
e669041
add response time
ywangd Sep 26, 2023
2873701
add merge and info stats support
ywangd Sep 26, 2023
4504e52
tests tweak
ywangd Sep 26, 2023
5c1fdb7
wrap chunked body and report size on close
ywangd Sep 27, 2023
4a4cd4a
use nanoTime
ywangd Sep 27, 2023
0d207b0
add tests for httpRouteStats
ywangd Sep 27, 2023
860341e
add tests for httpRouteStats
ywangd Sep 27, 2023
971f047
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Sep 27, 2023
5849e3d
address feedback
ywangd Sep 27, 2023
4b2e084
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Sep 27, 2023
49b95f8
upgrade encoded response length to long
ywangd Sep 27, 2023
9f84029
fix test
ywangd Sep 27, 2023
589b35d
assertion
ywangd Sep 27, 2023
159ff77
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Oct 3, 2023
f04e5af
Merge branch 'main' into es-95739
elasticmachine Oct 3, 2023
3a62b0d
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Oct 4, 2023
55fc8d3
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Oct 11, 2023
345623a
Update qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/htt…
ywangd Oct 12, 2023
a4355c4
fix import
ywangd Oct 12, 2023
bfb00c8
Merge remote-tracking branch 'origin/main' into es-95739
ywangd Oct 12, 2023
bd4ef2c
tweak
ywangd Oct 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/99852.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 99852
summary: Record more detailed HTTP stats
area: Network
type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# This test needs multiple nodes, because a single-node cluster does not send any transport actions so these stats are empty
---
"http stats":
- skip:
features: [arbitrary_key]

- do:
search:
index: "*"
body:
query:
match_all: {}

- do:
nodes.stats:
metric: [ http ]
human: true

- set:
nodes._arbitrary_key_: node_id

- is_true: "nodes.$node_id.http.routes./_cat/nodes"
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ static TransportVersion def(int id) {
public static final TransportVersion WAIT_FOR_CLUSTER_STATE_IN_RECOVERY_ADDED = def(8_502_00_0);
public static final TransportVersion RECOVERY_COMMIT_TOO_NEW_EXCEPTION_ADDED = def(8_503_00_0);
public static final TransportVersion NODE_INFO_COMPONENT_VERSIONS_ADDED = def(8_504_00_0);
public static final TransportVersion NODE_STATS_HTTP_ROUTE_STATS_ADDED = def(8_505_00_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.elasticsearch.common.path;

import org.elasticsearch.common.collect.Iterators;

import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
Expand Down Expand Up @@ -267,6 +269,15 @@ private void put(Map<String, String> params, TrieNode node, String value) {
}
}

Iterator<T> allNodeValues() {
final Iterator<T> childrenIterator = Iterators.flatMap(children.values().iterator(), TrieNode::allNodeValues);
if (value == null) {
return childrenIterator;
} else {
return Iterators.concat(Iterators.single(value), childrenIterator);
}
}

@Override
public String toString() {
return key;
Expand Down Expand Up @@ -366,4 +377,8 @@ public T next() {
}
};
}

public Iterator<T> allNodeValues() {
return Iterators.concat(Iterators.single(rootValue), root.allNodeValues());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ public HttpInfo info() {

@Override
public HttpStats stats() {
return new HttpStats(httpChannels.size(), totalChannelsAccepted.get(), httpClientStatsTracker.getClientStats());
return new HttpStats(
httpChannels.size(),
totalChannelsAccepted.get(),
httpClientStatsTracker.getClientStats(),
dispatcher.getStats()
);
}

protected void bindServer() {
Expand Down
90 changes: 90 additions & 0 deletions server/src/main/java/org/elasticsearch/http/HttpRouteStats.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.http;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;

public record HttpRouteStats(
long requestCount,
long totalRequestSize,
long[] requestSizeHistogram,
long responseCount,
long totalResponseSize,
long[] responseSizeHistogram
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think this is mentioned in the original issue, but is it possible or beneficial to track response statuses counts as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is relatively easy to track the number of different response status. But we are probably more interested in their recent trends rather than the overall stats from last restart because we want to know whether the node is "currently" experiencing problem. This means we need to compute some moving averages instead of the overall average which is what we are doing here for request/response sizes. I have not yet found an existing example of computing moving averages for stats collection. It's definitely doable. But I also wonder whether it starts getting into the territory of APM and should be handled externally. This might be why we haven't done it? I'll dig a bit more. For the purpose of this PR, I think it's better to keep them separate.

) implements Writeable, ToXContentObject {

public HttpRouteStats(StreamInput in) throws IOException {
this(in.readVLong(), in.readVLong(), in.readVLongArray(), in.readVLong(), in.readVLong(), in.readVLongArray());
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();

builder.startObject("requests");
builder.field("count", requestCount);
builder.humanReadableField("total_size_in_bytes", "total_size", ByteSizeValue.ofBytes(totalRequestSize));
histogramToXContent(builder, requestSizeHistogram);
builder.endObject();

builder.startObject("responses");
builder.field("count", responseCount);
builder.humanReadableField("total_size_in_bytes", "total_size", ByteSizeValue.ofBytes(totalResponseSize));
histogramToXContent(builder, responseSizeHistogram);
builder.endObject();

return builder.endObject();
}

static void histogramToXContent(XContentBuilder builder, long[] sizeHistogram) throws IOException {
final int[] bucketBounds = HttpRouteStatsTracker.getBucketUpperBounds();
assert sizeHistogram.length == bucketBounds.length + 1;
builder.startArray("histogram");

int firstBucket = 0;
long remainingCount = 0L;
for (int i = 0; i < sizeHistogram.length; i++) {
if (remainingCount == 0) {
firstBucket = i;
}
remainingCount += sizeHistogram[i];
}

for (int i = firstBucket; i < sizeHistogram.length && 0 < remainingCount; i++) {
builder.startObject();
if (i > 0) {
builder.humanReadableField("ge_bytes", "ge", ByteSizeValue.ofBytes(bucketBounds[i - 1]));
}
if (i < bucketBounds.length) {
builder.humanReadableField("lt_bytes", "lt", ByteSizeValue.ofBytes(bucketBounds[i]));
}
builder.field("count", sizeHistogram[i]);
builder.endObject();
remainingCount -= sizeHistogram[i];
}
builder.endArray();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(requestCount);
out.writeVLong(totalRequestSize);
out.writeVLongArray(requestSizeHistogram);
out.writeVLong(responseCount);
out.writeVLong(totalResponseSize);
out.writeVLongArray(responseSizeHistogram);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.http;

import java.util.concurrent.atomic.AtomicLongArray;
import java.util.concurrent.atomic.LongAdder;

public class HttpRouteStatsTracker {

/*
* default http.max_content_length is 100 MB so that the last histogram bucket is > 64MB (2^26)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the maximum request size but for responses we can return much more (maybe even GiBs) - suggest adding another 4 or 5 buckets at least.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. I added 4 more buckets so that the last bucket is for > 1.0GB.

*/

public static int[] getBucketUpperBounds() {
var bounds = new int[27];
for (int i = 0; i < bounds.length; i++) {
bounds[i] = 1 << i;
}
return bounds;
}

private static final int BUCKET_COUNT = getBucketUpperBounds().length + 1;

private static final long LAST_BUCKET_LOWER_BOUND = getBucketUpperBounds()[BUCKET_COUNT - 2];

private record StatsTracker(LongAdder count, LongAdder totalSize, AtomicLongArray histogram) {
StatsTracker {
assert count.longValue() == 0L;
assert totalSize.longValue() == 0L;
assert histogram.length() == BUCKET_COUNT;
}

StatsTracker() {
this(new LongAdder(), new LongAdder(), new AtomicLongArray(BUCKET_COUNT));
}

void addStats(int contentLength) {
count().increment();
totalSize().add(contentLength);
histogram().incrementAndGet(bucket(contentLength));
}

long[] getHistogram() {
long[] histogramCopy = new long[BUCKET_COUNT];
for (int i = 0; i < BUCKET_COUNT; i++) {
histogramCopy[i] = histogram().get(i);
}
return histogramCopy;
}
}

private static int bucket(int contentLength) {
if (contentLength <= 0) {
return 0;
} else if (LAST_BUCKET_LOWER_BOUND <= contentLength) {
return BUCKET_COUNT - 1;
} else {
return Integer.SIZE - Integer.numberOfLeadingZeros(contentLength);
}
}

private final StatsTracker requestStats = new StatsTracker();
private final StatsTracker responseStats = new StatsTracker();

public void addRequestStats(int contentLength) {
requestStats.addStats(contentLength);
}

public void addResponseStats(int contentLength) {
responseStats.addStats(contentLength);
}

public HttpRouteStats getStats() {
return new HttpRouteStats(
requestStats.count().longValue(),
requestStats.totalSize().longValue(),
requestStats.getHistogram(),
responseStats.count().longValue(),
responseStats.totalSize().longValue(),
responseStats.getHistogram()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.elasticsearch.rest.RestChannel;
import org.elasticsearch.rest.RestRequest;

import java.util.Map;

public interface HttpServerTransport extends LifecycleComponent, ReportingService<HttpInfo> {

String HTTP_PROFILE_NAME = ".http";
Expand Down Expand Up @@ -52,5 +54,8 @@ interface Dispatcher {
*/
void dispatchBadRequest(RestChannel channel, ThreadContext threadContext, Throwable cause);

default Map<String, HttpRouteStats> getStats() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: I think it is worth renaming to routeStats or perRouteStats to be consistent with stats method. That one could also have some reasonable default now.

return Map.of();
}
}
}
45 changes: 39 additions & 6 deletions server/src/main/java/org/elasticsearch/http/HttpStats.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,39 @@
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

public record HttpStats(long serverOpen, long totalOpen, List<ClientStats> clientStats) implements Writeable, ChunkedToXContent {
import static org.elasticsearch.TransportVersions.NODE_STATS_HTTP_ROUTE_STATS_ADDED;

public static final HttpStats IDENTITY = new HttpStats(0, 0, List.of());
public record HttpStats(long serverOpen, long totalOpen, List<ClientStats> clientStats, Map<String, HttpRouteStats> httpRouteStats)
implements
Writeable,
ChunkedToXContent {

public static final HttpStats IDENTITY = new HttpStats(0, 0, List.of(), Map.of());

public HttpStats(long serverOpen, long totalOpened) {
this(serverOpen, totalOpened, List.of());
this(serverOpen, totalOpened, List.of(), Map.of());
}

public HttpStats(StreamInput in) throws IOException {
this(in.readVLong(), in.readVLong(), in.readCollectionAsList(ClientStats::new));
this(
in.readVLong(),
in.readVLong(),
in.readCollectionAsList(ClientStats::new),
in.getTransportVersion().onOrAfter(NODE_STATS_HTTP_ROUTE_STATS_ADDED) ? in.readMap(HttpRouteStats::new) : Map.of()
);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(serverOpen);
out.writeVLong(totalOpen);
out.writeCollection(clientStats);
if (out.getTransportVersion().onOrAfter(NODE_STATS_HTTP_ROUTE_STATS_ADDED)) {
out.writeMap(httpRouteStats, StreamOutput::writeWriteable);
}
}

public long getServerOpen() {
Expand All @@ -57,7 +71,8 @@ public static HttpStats merge(HttpStats first, HttpStats second) {
return new HttpStats(
first.serverOpen + second.serverOpen,
first.totalOpen + second.totalOpen,
Stream.concat(first.clientStats.stream(), second.clientStats.stream()).toList()
Stream.concat(first.clientStats.stream(), second.clientStats.stream()).toList(),
Map.of() // TODO: merge
);
}

Expand All @@ -78,6 +93,7 @@ static final class Fields {
static final String CLIENT_REQUEST_SIZE_BYTES = "request_size_bytes";
static final String CLIENT_FORWARDED_FOR = "x_forwarded_for";
static final String CLIENT_OPAQUE_ID = "x_opaque_id";
static final String ROUTES = "routes";
}

@Override
Expand All @@ -90,7 +106,24 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params outerP
.startArray(Fields.CLIENTS)
),
clientStats.iterator(),
Iterators.single((builder, params) -> builder.endArray().endObject())
Iterators.single((builder, params) -> {
builder.endArray();
if (httpRouteStats.isEmpty() == false) {
ywangd marked this conversation as resolved.
Show resolved Hide resolved
builder.startObject(Fields.ROUTES);
}
return builder;
}),
Iterators.map(httpRouteStats.entrySet().iterator(), entry -> (builder, params) -> {
builder.field(entry.getKey());
entry.getValue().toXContent(builder, params);
return builder;
}),
Iterators.single((builder, params) -> {
if (httpRouteStats.isEmpty() == false) {
builder.endObject();
}
return builder.endObject();
})
);
}

Expand Down
Loading