-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Record more detailed HTTP stats #99852
Changes from 2 commits
c22fe4a
56d857a
2a4f24a
32a9e3a
e669041
2873701
4504e52
5c1fdb7
4a4cd4a
0d207b0
860341e
971f047
5849e3d
4b2e084
49b95f8
9f84029
589b35d
159ff77
f04e5af
3a62b0d
55fc8d3
345623a
a4355c4
bfb00c8
bd4ef2c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 99852 | ||
summary: Record more detailed HTTP stats | ||
area: Network | ||
type: enhancement | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# This test needs multiple nodes, because a single-node cluster does not send any transport actions so these stats are empty | ||
--- | ||
"http stats": | ||
- skip: | ||
features: [arbitrary_key] | ||
|
||
- do: | ||
search: | ||
index: "*" | ||
body: | ||
query: | ||
match_all: {} | ||
|
||
- do: | ||
nodes.stats: | ||
metric: [ http ] | ||
human: true | ||
|
||
- set: | ||
nodes._arbitrary_key_: node_id | ||
|
||
- is_true: "nodes.$node_id.http.routes./_cat/nodes" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.http; | ||
|
||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.common.io.stream.Writeable; | ||
import org.elasticsearch.common.unit.ByteSizeValue; | ||
import org.elasticsearch.xcontent.ToXContentObject; | ||
import org.elasticsearch.xcontent.XContentBuilder; | ||
|
||
import java.io.IOException; | ||
|
||
public record HttpRouteStats( | ||
long requestCount, | ||
long totalRequestSize, | ||
long[] requestSizeHistogram, | ||
long responseCount, | ||
long totalResponseSize, | ||
long[] responseSizeHistogram | ||
) implements Writeable, ToXContentObject { | ||
|
||
public HttpRouteStats(StreamInput in) throws IOException { | ||
this(in.readVLong(), in.readVLong(), in.readVLongArray(), in.readVLong(), in.readVLong(), in.readVLongArray()); | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.startObject(); | ||
|
||
builder.startObject("requests"); | ||
builder.field("count", requestCount); | ||
builder.humanReadableField("total_size_in_bytes", "total_size", ByteSizeValue.ofBytes(totalRequestSize)); | ||
histogramToXContent(builder, requestSizeHistogram); | ||
builder.endObject(); | ||
|
||
builder.startObject("responses"); | ||
builder.field("count", responseCount); | ||
builder.humanReadableField("total_size_in_bytes", "total_size", ByteSizeValue.ofBytes(totalResponseSize)); | ||
histogramToXContent(builder, responseSizeHistogram); | ||
builder.endObject(); | ||
|
||
return builder.endObject(); | ||
} | ||
|
||
static void histogramToXContent(XContentBuilder builder, long[] sizeHistogram) throws IOException { | ||
final int[] bucketBounds = HttpRouteStatsTracker.getBucketUpperBounds(); | ||
assert sizeHistogram.length == bucketBounds.length + 1; | ||
builder.startArray("histogram"); | ||
|
||
int firstBucket = 0; | ||
long remainingCount = 0L; | ||
for (int i = 0; i < sizeHistogram.length; i++) { | ||
if (remainingCount == 0) { | ||
firstBucket = i; | ||
} | ||
remainingCount += sizeHistogram[i]; | ||
} | ||
|
||
for (int i = firstBucket; i < sizeHistogram.length && 0 < remainingCount; i++) { | ||
builder.startObject(); | ||
if (i > 0) { | ||
builder.humanReadableField("ge_bytes", "ge", ByteSizeValue.ofBytes(bucketBounds[i - 1])); | ||
} | ||
if (i < bucketBounds.length) { | ||
builder.humanReadableField("lt_bytes", "lt", ByteSizeValue.ofBytes(bucketBounds[i])); | ||
} | ||
builder.field("count", sizeHistogram[i]); | ||
builder.endObject(); | ||
remainingCount -= sizeHistogram[i]; | ||
} | ||
builder.endArray(); | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
out.writeVLong(requestCount); | ||
out.writeVLong(totalRequestSize); | ||
out.writeVLongArray(requestSizeHistogram); | ||
out.writeVLong(responseCount); | ||
out.writeVLong(totalResponseSize); | ||
out.writeVLongArray(responseSizeHistogram); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.http; | ||
|
||
import java.util.concurrent.atomic.AtomicLongArray; | ||
import java.util.concurrent.atomic.LongAdder; | ||
|
||
public class HttpRouteStatsTracker { | ||
|
||
/* | ||
* default http.max_content_length is 100 MB so that the last histogram bucket is > 64MB (2^26) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's the maximum request size but for responses we can return much more (maybe even GiBs) - suggest adding another 4 or 5 buckets at least. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call. I added 4 more buckets so that the last bucket is for > 1.0GB. |
||
*/ | ||
|
||
public static int[] getBucketUpperBounds() { | ||
var bounds = new int[27]; | ||
for (int i = 0; i < bounds.length; i++) { | ||
bounds[i] = 1 << i; | ||
} | ||
return bounds; | ||
} | ||
|
||
private static final int BUCKET_COUNT = getBucketUpperBounds().length + 1; | ||
|
||
private static final long LAST_BUCKET_LOWER_BOUND = getBucketUpperBounds()[BUCKET_COUNT - 2]; | ||
|
||
private record StatsTracker(LongAdder count, LongAdder totalSize, AtomicLongArray histogram) { | ||
StatsTracker { | ||
assert count.longValue() == 0L; | ||
assert totalSize.longValue() == 0L; | ||
assert histogram.length() == BUCKET_COUNT; | ||
} | ||
|
||
StatsTracker() { | ||
this(new LongAdder(), new LongAdder(), new AtomicLongArray(BUCKET_COUNT)); | ||
} | ||
|
||
void addStats(int contentLength) { | ||
count().increment(); | ||
totalSize().add(contentLength); | ||
histogram().incrementAndGet(bucket(contentLength)); | ||
} | ||
|
||
long[] getHistogram() { | ||
long[] histogramCopy = new long[BUCKET_COUNT]; | ||
for (int i = 0; i < BUCKET_COUNT; i++) { | ||
histogramCopy[i] = histogram().get(i); | ||
} | ||
return histogramCopy; | ||
} | ||
} | ||
|
||
private static int bucket(int contentLength) { | ||
if (contentLength <= 0) { | ||
return 0; | ||
} else if (LAST_BUCKET_LOWER_BOUND <= contentLength) { | ||
return BUCKET_COUNT - 1; | ||
} else { | ||
return Integer.SIZE - Integer.numberOfLeadingZeros(contentLength); | ||
} | ||
} | ||
|
||
private final StatsTracker requestStats = new StatsTracker(); | ||
private final StatsTracker responseStats = new StatsTracker(); | ||
|
||
public void addRequestStats(int contentLength) { | ||
requestStats.addStats(contentLength); | ||
} | ||
|
||
public void addResponseStats(int contentLength) { | ||
responseStats.addStats(contentLength); | ||
} | ||
|
||
public HttpRouteStats getStats() { | ||
return new HttpRouteStats( | ||
requestStats.count().longValue(), | ||
requestStats.totalSize().longValue(), | ||
requestStats.getHistogram(), | ||
responseStats.count().longValue(), | ||
responseStats.totalSize().longValue(), | ||
responseStats.getHistogram() | ||
); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
import org.elasticsearch.rest.RestChannel; | ||
import org.elasticsearch.rest.RestRequest; | ||
|
||
import java.util.Map; | ||
|
||
public interface HttpServerTransport extends LifecycleComponent, ReportingService<HttpInfo> { | ||
|
||
String HTTP_PROFILE_NAME = ".http"; | ||
|
@@ -52,5 +54,8 @@ interface Dispatcher { | |
*/ | ||
void dispatchBadRequest(RestChannel channel, ThreadContext threadContext, Throwable cause); | ||
|
||
default Map<String, HttpRouteStats> getStats() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: I think it is worth renaming to |
||
return Map.of(); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think this is mentioned in the original issue, but is it possible or beneficial to track response statuses counts as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is relatively easy to track the number of different response status. But we are probably more interested in their recent trends rather than the overall stats from last restart because we want to know whether the node is "currently" experiencing problem. This means we need to compute some moving averages instead of the overall average which is what we are doing here for request/response sizes. I have not yet found an existing example of computing moving averages for stats collection. It's definitely doable. But I also wonder whether it starts getting into the territory of APM and should be handled externally. This might be why we haven't done it? I'll dig a bit more. For the purpose of this PR, I think it's better to keep them separate.