From 46eb23c2fd8c7606a891f2e065e384cb0a889974 Mon Sep 17 00:00:00 2001 From: Bhumika Saini Date: Sat, 14 Oct 2023 19:46:09 +0530 Subject: [PATCH] [Remote Store] Add Remote Store backpressure rejection stats to _nodes/stats (#10524) Signed-off-by: Bhumika Saini Signed-off-by: Siddhant Deshmukh --- CHANGELOG.md | 1 + .../index/remote/RemoteSegmentStats.java | 51 ++++++++++++++++--- .../remote/RemoteSegmentTransferTracker.java | 3 +- .../cluster/node/stats/NodeStatsTests.java | 1 + .../index/shard/IndexShardTests.java | 4 ++ 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db55c354bd9f0..e1f40b02bb74d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies ### Changed +- [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java index c7863536adf20..5992923a4157b 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java @@ -8,6 +8,7 @@ package org.opensearch.index.remote; +import org.opensearch.Version; import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStats; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; @@ -75,6 +76,10 @@ public class RemoteSegmentStats implements Writeable, ToXContentFragment { * Total time spent in downloading segments from remote store */ private long totalDownloadTime; + /** + * Total rejections due to remote store upload backpressure + */ + private long totalRejections; public RemoteSegmentStats() {} @@ -90,6 +95,10 @@ public RemoteSegmentStats(StreamInput in) throws IOException { totalRefreshBytesLag = in.readLong(); totalUploadTime = in.readLong(); totalDownloadTime = in.readLong(); + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + totalRejections = in.readVLong(); + } } /** @@ -115,6 +124,7 @@ public RemoteSegmentStats(RemoteSegmentTransferTracker.Stats trackerStats) { this.totalRefreshBytesLag = trackerStats.bytesLag; this.totalUploadTime = trackerStats.totalUploadTimeInMs; this.totalDownloadTime = trackerStats.directoryFileTransferTrackerStats.totalTransferTimeInMs; + this.totalRejections = trackerStats.rejectionCount; } // Getter and setters. All are visible for testing @@ -207,6 +217,14 @@ public void addTotalDownloadTime(long totalDownloadTime) { this.totalDownloadTime += totalDownloadTime; } + public long getTotalRejections() { + return totalRejections; + } + + public void addTotalRejections(long totalRejections) { + this.totalRejections += totalRejections; + } + /** * Adds existing stats. Used for stats roll-ups at index or node level * @@ -225,6 +243,7 @@ public void add(RemoteSegmentStats existingStats) { this.totalRefreshBytesLag += existingStats.getTotalRefreshBytesLag(); this.totalUploadTime += existingStats.getTotalUploadTime(); this.totalDownloadTime += existingStats.getTotalDownloadTime(); + this.totalRejections += existingStats.totalRejections; } } @@ -241,18 +260,26 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalRefreshBytesLag); out.writeLong(totalUploadTime); out.writeLong(totalDownloadTime); + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeVLong(totalRejections); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.REMOTE_STORE); + builder.startObject(Fields.UPLOAD); buildUploadStats(builder); - builder.endObject(); + builder.endObject(); // UPLOAD + builder.startObject(Fields.DOWNLOAD); buildDownloadStats(builder); - builder.endObject(); - builder.endObject(); + builder.endObject(); // DOWNLOAD + + builder.endObject(); // REMOTE_STORE + return builder; } @@ -261,13 +288,19 @@ private void buildUploadStats(XContentBuilder builder) throws IOException { builder.humanReadableField(Fields.STARTED_BYTES, Fields.STARTED, new ByteSizeValue(uploadBytesStarted)); builder.humanReadableField(Fields.SUCCEEDED_BYTES, Fields.SUCCEEDED, new ByteSizeValue(uploadBytesSucceeded)); builder.humanReadableField(Fields.FAILED_BYTES, Fields.FAILED, new ByteSizeValue(uploadBytesFailed)); - builder.endObject(); + builder.endObject(); // TOTAL_UPLOAD_SIZE + builder.startObject(Fields.REFRESH_SIZE_LAG); builder.humanReadableField(Fields.TOTAL_BYTES, Fields.TOTAL, new ByteSizeValue(totalRefreshBytesLag)); builder.humanReadableField(Fields.MAX_BYTES, Fields.MAX, new ByteSizeValue(maxRefreshBytesLag)); - builder.endObject(); + builder.endObject(); // REFRESH_SIZE_LAG + builder.humanReadableField(Fields.MAX_REFRESH_TIME_LAG_IN_MILLIS, Fields.MAX_REFRESH_TIME_LAG, new TimeValue(maxRefreshTimeLag)); builder.humanReadableField(Fields.TOTAL_TIME_SPENT_IN_MILLIS, Fields.TOTAL_TIME_SPENT, new TimeValue(totalUploadTime)); + + builder.startObject(Fields.PRESSURE); + builder.field(Fields.TOTAL_REJECTIONS, totalRejections); + builder.endObject(); // PRESSURE } private void buildDownloadStats(XContentBuilder builder) throws IOException { @@ -300,6 +333,8 @@ static final class Fields { static final String MAX_BYTES = "max_bytes"; static final String TOTAL_TIME_SPENT = "total_time_spent"; static final String TOTAL_TIME_SPENT_IN_MILLIS = "total_time_spent_in_millis"; + static final String PRESSURE = "pressure"; + static final String TOTAL_REJECTIONS = "total_rejections"; } @Override @@ -318,7 +353,8 @@ public boolean equals(Object o) { && maxRefreshBytesLag == that.maxRefreshBytesLag && totalRefreshBytesLag == that.totalRefreshBytesLag && totalUploadTime == that.totalUploadTime - && totalDownloadTime == that.totalDownloadTime; + && totalDownloadTime == that.totalDownloadTime + && totalRejections == that.totalRejections; } @Override @@ -334,7 +370,8 @@ public int hashCode() { maxRefreshBytesLag, totalRefreshBytesLag, totalUploadTime, - totalDownloadTime + totalDownloadTime, + totalRejections ); } } diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 05081180bb179..2a703f17aa953 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -267,7 +267,8 @@ public long getRejectionCount() { return rejectionCount.get(); } - void incrementRejectionCount() { + /** public only for testing **/ + public void incrementRejectionCount() { rejectionCount.incrementAndGet(); } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index e3f16463a5328..e0b35c69cc3c0 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -815,6 +815,7 @@ private static NodeIndicesStats getNodeIndicesStats(boolean remoteStoreStats) { remoteSegmentStats.setMaxRefreshTimeLag(2L); remoteSegmentStats.addTotalUploadTime(20L); remoteSegmentStats.addTotalDownloadTime(20L); + remoteSegmentStats.addTotalRejections(5L); RemoteTranslogStats remoteTranslogStats = indicesStats.getTranslog().getRemoteTranslogStats(); RemoteTranslogStats otherRemoteTranslogStats = new RemoteTranslogStats(getRandomRemoteTranslogTransferTrackerStats()); diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index b2eb41828a4df..9ef9bec01cb38 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -4910,6 +4910,8 @@ private void populateSampleRemoteSegmentStats(RemoteSegmentTransferTracker track tracker.addUploadBytesStarted(30L); tracker.addUploadBytesSucceeded(10L); tracker.addUploadBytesFailed(10L); + tracker.incrementRejectionCount(); + tracker.incrementRejectionCount(); } private void populateSampleRemoteTranslogStats(RemoteTranslogTransferTracker tracker) { @@ -4943,5 +4945,7 @@ private static void assertRemoteSegmentStats( assertEquals(remoteSegmentTransferTracker.getUploadBytesStarted(), remoteSegmentStats.getUploadBytesStarted()); assertEquals(remoteSegmentTransferTracker.getUploadBytesSucceeded(), remoteSegmentStats.getUploadBytesSucceeded()); assertEquals(remoteSegmentTransferTracker.getUploadBytesFailed(), remoteSegmentStats.getUploadBytesFailed()); + assertTrue(remoteSegmentStats.getTotalRejections() > 0); + assertEquals(remoteSegmentTransferTracker.getRejectionCount(), remoteSegmentStats.getTotalRejections()); } }