From a1e42b1131d5d807dc66d13dd960ebe707da3f09 Mon Sep 17 00:00:00 2001 From: Kunal Kotwani Date: Tue, 16 May 2023 17:17:30 -0700 Subject: [PATCH 01/10] Add filecache support in clear indices cache API (#7498) Signed-off-by: Kunal Kotwani --- CHANGELOG.md | 1 + .../client/IndicesRequestConverters.java | 1 + .../client/IndicesRequestConvertersTests.java | 4 ++ .../api/indices.clear_cache.json | 4 ++ .../clear/ClearIndicesCacheBlocksIT.java | 39 ++++++++++++++ .../cache/clear/ClearIndicesCacheRequest.java | 17 ++++++ .../ClearIndicesCacheRequestBuilder.java | 5 ++ .../TransportClearIndicesCacheAction.java | 16 ++++++ .../store/remote/filecache/FileCache.java | 6 +++ .../store/remote/utils/cache/LRUCache.java | 6 ++- .../remote/utils/cache/RefCountedCache.java | 13 ++++- .../remote/utils/cache/SegmentedCache.java | 10 ++++ .../indices/RestClearIndicesCacheAction.java | 1 + ...TransportClearIndicesCacheActionTests.java | 53 +++++++++++++++++++ .../remote/filecache/FileCacheTests.java | 22 +++++++- .../utils/cache/RefCountedCacheTestCase.java | 15 ++++++ 16 files changed, 209 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09273f0182d8f..798be4678e550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Adds ExtensionsManager.lookupExtensionSettingsById ([#7466](https://github.com/opensearch-project/OpenSearch/pull/7466)) - SegRep with Remote: Add hook for publishing checkpoint notifications after segment upload to remote store ([#7394](https://github.com/opensearch-project/OpenSearch/pull/7394)) - Provide mechanism to configure XContent parsing constraints (after update to Jackson 2.15.0 and above) ([#7550](https://github.com/opensearch-project/OpenSearch/pull/7550)) +- Support to clear filecache using clear indices cache API ([#7498](https://github.com/opensearch-project/OpenSearch/pull/7498)) ### Dependencies - Bump `com.netflix.nebula:gradle-info-plugin` from 12.0.0 to 12.1.3 (#7564) diff --git a/client/rest-high-level/src/main/java/org/opensearch/client/IndicesRequestConverters.java b/client/rest-high-level/src/main/java/org/opensearch/client/IndicesRequestConverters.java index ca9154340a660..bffc5b0a21f1b 100644 --- a/client/rest-high-level/src/main/java/org/opensearch/client/IndicesRequestConverters.java +++ b/client/rest-high-level/src/main/java/org/opensearch/client/IndicesRequestConverters.java @@ -261,6 +261,7 @@ static Request clearCache(ClearIndicesCacheRequest clearIndicesCacheRequest) { parameters.withIndicesOptions(clearIndicesCacheRequest.indicesOptions()); parameters.putParam("query", Boolean.toString(clearIndicesCacheRequest.queryCache())); parameters.putParam("fielddata", Boolean.toString(clearIndicesCacheRequest.fieldDataCache())); + parameters.putParam("file", Boolean.toString(clearIndicesCacheRequest.fileCache())); parameters.putParam("request", Boolean.toString(clearIndicesCacheRequest.requestCache())); parameters.putParam("fields", String.join(",", clearIndicesCacheRequest.fields())); request.addParameters(parameters.asMap()); diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/IndicesRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/IndicesRequestConvertersTests.java index 7ed06129dc893..2710e9531bf1b 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/IndicesRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/IndicesRequestConvertersTests.java @@ -596,6 +596,10 @@ public void testClearCache() { clearIndicesCacheRequest.fields(RequestConvertersTests.randomIndicesNames(1, 5)); expectedParams.put("fields", String.join(",", clearIndicesCacheRequest.fields())); } + if (OpenSearchTestCase.randomBoolean()) { + clearIndicesCacheRequest.fileCache(OpenSearchTestCase.randomBoolean()); + } + expectedParams.put("file", Boolean.toString(clearIndicesCacheRequest.fileCache())); Request request = IndicesRequestConverters.clearCache(clearIndicesCacheRequest); StringJoiner endpoint = new StringJoiner("/", "/", ""); diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.clear_cache.json b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.clear_cache.json index 64c10a520c7c4..0c7eca8c8e6f5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.clear_cache.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.clear_cache.json @@ -67,6 +67,10 @@ "request":{ "type":"boolean", "description":"Clear request cache" + }, + "file":{ + "type":"boolean", + "description":"Clear filecache" } } } diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheBlocksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheBlocksIT.java index 305b39ac3ad0c..31c1aca53ae4a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheBlocksIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheBlocksIT.java @@ -36,6 +36,7 @@ import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; import java.util.Arrays; +import java.util.Collections; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ; @@ -89,4 +90,42 @@ public void testClearIndicesCacheWithBlocks() { } } } + + public void testClearIndicesFileCacheWithBlocks() { + createIndex("test"); + ensureGreen("test"); + + NumShards numShards = getNumShards("test"); + + // Request is not blocked + for (String blockSetting : Arrays.asList( + SETTING_BLOCKS_READ, + SETTING_BLOCKS_WRITE, + SETTING_READ_ONLY, + SETTING_READ_ONLY_ALLOW_DELETE + )) { + try { + enableIndexBlock("test", blockSetting); + ClearIndicesCacheResponse clearIndicesCacheResponse = client().admin() + .indices() + .prepareClearCache("test") + .setFileCache(true) + .execute() + .actionGet(); + assertNoFailures(clearIndicesCacheResponse); + assertThat(clearIndicesCacheResponse.getSuccessfulShards(), equalTo(numShards.totalNumShards)); + } finally { + disableIndexBlock("test", blockSetting); + } + } + + for (String blockSetting : Collections.singletonList(SETTING_BLOCKS_METADATA)) { + try { + enableIndexBlock("test", blockSetting); + assertBlocked(client().admin().indices().prepareClearCache("test").setQueryCache(true).setFileCache(true)); + } finally { + disableIndexBlock("test", blockSetting); + } + } + } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java index 35913c2579aa9..793d02d6c4d13 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java @@ -32,6 +32,7 @@ package org.opensearch.action.admin.indices.cache.clear; +import org.opensearch.Version; import org.opensearch.action.support.broadcast.BroadcastRequest; import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; @@ -49,6 +50,7 @@ public class ClearIndicesCacheRequest extends BroadcastRequest pathStartsWithShardPathPredicate = path -> path.startsWith(shardPath.getDataPath()); + node.fileCache().prune(pathStartsWithShardPathPredicate); + } + } + indicesService.clearIndexShardCache( shardRouting.shardId(), request.queryCache(), diff --git a/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java b/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java index 09ba6d5ba2105..0aa3740fb6ecb 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java @@ -22,6 +22,7 @@ import java.nio.file.Path; import java.util.List; import java.util.function.BiFunction; +import java.util.function.Predicate; import static org.opensearch.index.store.remote.directory.RemoteSnapshotDirectoryFactory.LOCAL_STORE_LOCATION; @@ -121,6 +122,11 @@ public long prune() { return theCache.prune(); } + @Override + public long prune(Predicate keyPredicate) { + return theCache.prune(keyPredicate); + } + @Override public CacheUsage usage() { return theCache.usage(); diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/LRUCache.java b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/LRUCache.java index f36055e5d7327..03d03711f914a 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/LRUCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/LRUCache.java @@ -22,6 +22,7 @@ import java.util.Objects; import java.util.concurrent.locks.ReentrantLock; import java.util.function.BiFunction; +import java.util.function.Predicate; /** * LRU implementation of {@link RefCountedCache}. As long as {@link Node#refCount} greater than 0 then node is not eligible for eviction. @@ -256,13 +257,16 @@ public void decRef(K key) { } @Override - public long prune() { + public long prune(Predicate keyPredicate) { long sum = 0L; lock.lock(); try { final Iterator> iterator = lru.values().iterator(); while (iterator.hasNext()) { final Node node = iterator.next(); + if (keyPredicate != null && !keyPredicate.test(node.key)) { + continue; + } iterator.remove(); data.remove(node.key, node); sum += node.weight; diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/RefCountedCache.java b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/RefCountedCache.java index bbb37dc57ae7e..e6b5a5f945d83 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/RefCountedCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/RefCountedCache.java @@ -11,6 +11,7 @@ import org.opensearch.index.store.remote.utils.cache.stats.CacheStats; import java.util.function.BiFunction; +import java.util.function.Predicate; /** * Custom Cache which support typical cache operations (put, get, ...) and it support reference counting per individual key which might @@ -80,7 +81,17 @@ public interface RefCountedCache { * * @return The total weight of all removed entries. */ - long prune(); + default long prune() { + return prune(key -> true); + } + + /** + * Removes the cache entries which match the predicate criteria for the key + * and have a reference count of zero, regardless of current capacity. + * + * @return The total weight of all removed entries. + */ + long prune(Predicate keyPredicate); /** * Returns the weighted usage of this cache. diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java index 42e44aa5f6a15..d3eb03df37e1b 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/cache/SegmentedCache.java @@ -15,6 +15,7 @@ import java.util.Objects; import java.util.function.BiFunction; +import java.util.function.Predicate; /** * Segmented {@link LRUCache} to offer concurrent access with less contention. @@ -138,6 +139,15 @@ public long prune() { return sum; } + @Override + public long prune(Predicate keyPredicate) { + long sum = 0L; + for (RefCountedCache cache : table) { + sum += cache.prune(keyPredicate); + } + return sum; + } + @Override public CacheUsage usage() { long usage = 0L; diff --git a/server/src/main/java/org/opensearch/rest/action/admin/indices/RestClearIndicesCacheAction.java b/server/src/main/java/org/opensearch/rest/action/admin/indices/RestClearIndicesCacheAction.java index e4f0d7d006d87..ee1d5a97d00c0 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/indices/RestClearIndicesCacheAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/indices/RestClearIndicesCacheAction.java @@ -83,6 +83,7 @@ public static ClearIndicesCacheRequest fromRequest(final RestRequest request, Cl clearIndicesCacheRequest.queryCache(request.paramAsBoolean("query", clearIndicesCacheRequest.queryCache())); clearIndicesCacheRequest.requestCache(request.paramAsBoolean("request", clearIndicesCacheRequest.requestCache())); clearIndicesCacheRequest.fieldDataCache(request.paramAsBoolean("fielddata", clearIndicesCacheRequest.fieldDataCache())); + clearIndicesCacheRequest.fileCache(request.paramAsBoolean("file", clearIndicesCacheRequest.fileCache())); clearIndicesCacheRequest.fields(request.paramAsStringArray("fields", clearIndicesCacheRequest.fields())); return clearIndicesCacheRequest; } diff --git a/server/src/test/java/org/opensearch/action/admin/indices/cache/clear/TransportClearIndicesCacheActionTests.java b/server/src/test/java/org/opensearch/action/admin/indices/cache/clear/TransportClearIndicesCacheActionTests.java index cce0927d4994f..85aa60d6f308b 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/cache/clear/TransportClearIndicesCacheActionTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/cache/clear/TransportClearIndicesCacheActionTests.java @@ -9,26 +9,45 @@ package org.opensearch.action.admin.indices.cache.clear; import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.broadcast.node.TransportBroadcastByNodeAction; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.breaker.NoopCircuitBreaker; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.env.TestEnvironment; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.remote.filecache.FileCache; +import org.opensearch.index.store.remote.filecache.FileCacheFactory; +import org.opensearch.index.store.remote.filecache.FileCacheTests; import org.opensearch.indices.IndicesService; +import org.opensearch.node.Node; import org.opensearch.rest.RestStatus; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.transport.TransportService; +import java.io.IOException; +import java.nio.file.Path; import java.util.EnumSet; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class TransportClearIndicesCacheActionTests extends OpenSearchTestCase { + + private final Node testNode = mock(Node.class); private final TransportClearIndicesCacheAction action = new TransportClearIndicesCacheAction( mock(ClusterService.class), mock(TransportService.class), mock(IndicesService.class), + testNode, mock(ActionFilters.class), mock(IndexNameExpressionResolver.class) ); @@ -55,6 +74,40 @@ public class TransportClearIndicesCacheActionTests extends OpenSearchTestCase { EnumSet.of(ClusterBlockLevel.METADATA_READ) ); + public void testOnShardOperation() throws IOException { + final String indexName = "test"; + final Settings settings = buildEnvSettings(Settings.EMPTY); + final Environment environment = TestEnvironment.newEnvironment(settings); + try (final NodeEnvironment nodeEnvironment = new NodeEnvironment(settings, environment)) { + // Initialize necessary stubs for the filecache clear shard operation + final ShardId shardId = new ShardId(indexName, indexName, 1); + final ShardRouting shardRouting = mock(ShardRouting.class); + when(shardRouting.shardId()).thenReturn(shardId); + final ShardPath shardPath = ShardPath.loadFileCachePath(nodeEnvironment, shardId); + final Path cacheEntryPath = shardPath.getDataPath(); + final FileCache fileCache = FileCacheFactory.createConcurrentLRUFileCache(1024 * 1024 * 1024, 16, new NoopCircuitBreaker("")); + + when(testNode.fileCache()).thenReturn(fileCache); + when(testNode.getNodeEnvironment()).thenReturn(nodeEnvironment); + + // Add an entry into the filecache and reduce the ref count + fileCache.put(cacheEntryPath, new FileCacheTests.StubCachedIndexInput(1)); + fileCache.decRef(cacheEntryPath); + + // Check if the entry exists and reduce the ref count to make it evictable + assertNotNull(fileCache.get(cacheEntryPath)); + fileCache.decRef(cacheEntryPath); + + ClearIndicesCacheRequest clearIndicesCacheRequest = new ClearIndicesCacheRequest(); + clearIndicesCacheRequest.fileCache(true); + assertEquals( + TransportBroadcastByNodeAction.EmptyResult.INSTANCE, + action.shardOperation(clearIndicesCacheRequest, shardRouting) + ); + assertNull(fileCache.get(cacheEntryPath)); + } + } + public void testGlobalBlockCheck() { ClusterBlocks.Builder builder = ClusterBlocks.builder(); builder.addGlobalBlock(writeClusterBlock); diff --git a/server/src/test/java/org/opensearch/index/store/remote/filecache/FileCacheTests.java b/server/src/test/java/org/opensearch/index/store/remote/filecache/FileCacheTests.java index 0cc0cc8d31ade..43a5c04b59f83 100644 --- a/server/src/test/java/org/opensearch/index/store/remote/filecache/FileCacheTests.java +++ b/server/src/test/java/org/opensearch/index/store/remote/filecache/FileCacheTests.java @@ -225,6 +225,24 @@ public void testPrune() { assertEquals(fileCache.size(), 0); } + public void testPruneWithPredicate() { + FileCache fileCache = createFileCache(GIGA_BYTES); + for (int i = 0; i < 4; i++) { + putAndDecRef(fileCache, i, 8 * MEGA_BYTES); + } + + // before prune + assertEquals(fileCache.size(), 4); + + // after prune with false predicate + fileCache.prune(path -> false); + assertEquals(fileCache.size(), 4); + + // after prune with true predicate + fileCache.prune(path -> true); + assertEquals(fileCache.size(), 0); + } + public void testUsage() { FileCache fileCache = FileCacheFactory.createConcurrentLRUFileCache( 16 * MEGA_BYTES, @@ -280,11 +298,11 @@ private void putAndDecRef(FileCache cache, int path, long indexInputSize) { cache.decRef(key); } - private static class StubCachedIndexInput implements CachedIndexInput { + public static class StubCachedIndexInput implements CachedIndexInput { private final long length; - private StubCachedIndexInput(long length) { + public StubCachedIndexInput(long length) { this.length = length; } diff --git a/server/src/test/java/org/opensearch/index/store/remote/utils/cache/RefCountedCacheTestCase.java b/server/src/test/java/org/opensearch/index/store/remote/utils/cache/RefCountedCacheTestCase.java index 59657eebc4480..b11740b53e11f 100644 --- a/server/src/test/java/org/opensearch/index/store/remote/utils/cache/RefCountedCacheTestCase.java +++ b/server/src/test/java/org/opensearch/index/store/remote/utils/cache/RefCountedCacheTestCase.java @@ -138,6 +138,21 @@ public void testPrune() { assertEquals(10L, (long) refCountedCache.get("3")); } + public void testPruneWithPredicate() { + refCountedCache.put("1", 10L); + refCountedCache.decRef("1"); + refCountedCache.put("2", 10L); + refCountedCache.decRef("2"); + refCountedCache.put("3", 10L); + + assertEquals(0L, refCountedCache.prune(path -> false)); + + assertEquals(20L, refCountedCache.prune(path -> true)); + assertNull(refCountedCache.get("1")); + assertNull(refCountedCache.get("2")); + assertEquals(10L, (long) refCountedCache.get("3")); + } + public void testStats() { assertEquals(0, refCountedCache.stats().hitCount()); refCountedCache.put("1", 1L); From 613f4aa046912b583925a9a03cb2294efd2a002c Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Wed, 17 May 2023 14:14:27 +0530 Subject: [PATCH 02/10] [Remote Translog] Trimming based on Remote Segment Store (#7383) * [Remote Translog] Set min referenced as per remote segment store only when enabled. Add IT for failover using remote translog as well Signed-off-by: Gaurav Bafna --- codecov.yml | 3 + .../remotestore/RemoteStoreBaseIT.java | 76 +++++++++++ .../opensearch/remotestore/RemoteStoreIT.java | 38 +----- .../ReplicaToPrimaryPromotionIT.java | 118 ++++++++++++++++++ .../index/translog/RemoteFsTranslog.java | 10 +- .../opensearch/index/translog/Translog.java | 6 +- .../index/translog/TranslogWriter.java | 7 +- .../transfer/TranslogTransferManager.java | 12 +- .../index/translog/RemoteFSTranslogTests.java | 19 ++- .../translog/TranslogDeletionPolicyTests.java | 3 +- .../TranslogTransferManagerTests.java | 48 ++++++- .../opensearch/test/BackgroundIndexer.java | 15 ++- .../test/OpenSearchIntegTestCase.java | 17 +++ 13 files changed, 308 insertions(+), 64 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/ReplicaToPrimaryPromotionIT.java diff --git a/codecov.yml b/codecov.yml index 7c38e4e63325b..b8a7d220fa214 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,6 +1,9 @@ codecov: require_ci_to_pass: yes +ignore: + - "test" + coverage: precision: 2 round: down diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIT.java new file mode 100644 index 0000000000000..bdaa5af222459 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIT.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; + +import java.nio.file.Path; +import java.util.Collection; + +import static java.util.Arrays.asList; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreBaseIT extends OpenSearchIntegTestCase { + protected static final String REPOSITORY_NAME = "test-remore-store-repo"; + protected static final int SHARD_COUNT = 1; + protected static final int REPLICA_COUNT = 1; + + @Override + protected Collection> nodePlugins() { + return asList(MockTransportService.TestPlugin.class); + } + + @Override + protected boolean addMockInternalEngine() { + return false; + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.REMOTE_STORE, "true").build(); + } + + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY, REPOSITORY_NAME) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, SHARD_COUNT) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, REPLICA_COUNT) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s") + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + } + + @Before + public void setup() { + internalCluster().startClusterManagerOnlyNode(); + Path absolutePath = randomRepoPath().toAbsolutePath(); + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME).setType("fs").setSettings(Settings.builder().put("location", absolutePath)) + ); + } + + @After + public void teardown() { + assertAcked(clusterAdmin().prepareDeleteRepository(REPOSITORY_NAME)); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 4d66bdd6cf3d1..41ae0da9ccb72 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -8,8 +8,6 @@ package org.opensearch.remotestore; -import org.junit.After; -import org.junit.Before; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.index.IndexResponse; @@ -18,17 +16,13 @@ import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; -import org.opensearch.index.IndexModule; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.transport.MockTransportService; import java.io.IOException; -import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -40,9 +34,8 @@ import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteStoreIT extends OpenSearchIntegTestCase { +public class RemoteStoreIT extends RemoteStoreBaseIT { - private static final String REPOSITORY_NAME = "test-remore-store-repo"; private static final String INDEX_NAME = "remote-store-test-idx-1"; private static final String TOTAL_OPERATIONS = "total-operations"; private static final String REFRESHED_OR_FLUSHED_OPERATIONS = "refreshed-or-flushed-operations"; @@ -62,13 +55,8 @@ public Settings indexSettings() { private Settings remoteStoreIndexSettings(int numberOfReplicas) { return Settings.builder() .put(super.indexSettings()) - .put("index.refresh_interval", "300s") .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numberOfReplicas) - .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) - .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) - .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) - .put(IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY, REPOSITORY_NAME) .build(); } @@ -80,30 +68,6 @@ private Settings remoteTranslogIndexSettings(int numberOfReplicas) { .build(); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - - @Override - protected Settings featureFlagSettings() { - return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.REMOTE_STORE, "true").build(); - } - - @Before - public void setup() { - internalCluster().startClusterManagerOnlyNode(); - Path absolutePath = randomRepoPath().toAbsolutePath(); - assertAcked( - clusterAdmin().preparePutRepository(REPOSITORY_NAME).setType("fs").setSettings(Settings.builder().put("location", absolutePath)) - ); - } - - @After - public void teardown() { - assertAcked(clusterAdmin().prepareDeleteRepository(REPOSITORY_NAME)); - } - private IndexResponse indexSingleDoc() { return client().prepareIndex(INDEX_NAME) .setId(UUIDs.randomBase64UUID()) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/ReplicaToPrimaryPromotionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/ReplicaToPrimaryPromotionIT.java new file mode 100644 index 0000000000000..fe8f3612be6bf --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/ReplicaToPrimaryPromotionIT.java @@ -0,0 +1,118 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import com.carrotsearch.randomizedtesting.RandomizedTest; +import org.opensearch.action.admin.indices.close.CloseIndexResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.BackgroundIndexer; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.Locale; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; + +@OpenSearchIntegTestCase.ClusterScope(numDataNodes = 0) +public class ReplicaToPrimaryPromotionIT extends RemoteStoreBaseIT { + private int shard_count = 5; + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, shard_count) + .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, REPOSITORY_NAME) + .build(); + } + + public void testPromoteReplicaToPrimary() throws Exception { + internalCluster().startNode(); + internalCluster().startNode(); + final String indexName = randomAlphaOfLength(5).toLowerCase(Locale.ROOT); + shard_count = scaledRandomIntBetween(1, 5); + createIndex(indexName); + int numOfDocs = 0; + int numIter = scaledRandomIntBetween(0, 10); + for (int i = 0; i < numIter; i++) { + final int numOfDoc = scaledRandomIntBetween(0, 200); + logger.info("num of docs in iter {} {}", numOfDoc, i); + if (numOfDoc > 0) { + try ( + BackgroundIndexer indexer = new BackgroundIndexer( + indexName, + "_doc", + client(), + numOfDoc, + RandomizedTest.scaledRandomIntBetween(2, 5), + false, + null + ) + ) { + indexer.setUseAutoGeneratedIDs(true); + indexer.start(numOfDoc); + waitForIndexed(numOfDoc, indexer); + numOfDocs += numOfDoc; + indexer.stopAndAwaitStopped(); + if (random().nextBoolean()) { + // 90% refresh + 10% flush + if (random().nextInt(10) != 0) { + refresh(indexName); + } else { + flush(indexName); + } + } + } + } + } + + ensureGreen(indexName); + + // sometimes test with a closed index + final IndexMetadata.State indexState = randomFrom(IndexMetadata.State.OPEN, IndexMetadata.State.CLOSE); + if (indexState == IndexMetadata.State.CLOSE) { + CloseIndexResponse closeIndexResponse = client().admin().indices().prepareClose(indexName).get(); + assertThat("close index not acked - " + closeIndexResponse, closeIndexResponse.isAcknowledged(), equalTo(true)); + ensureGreen(indexName); + } + + // pick up a data node that contains a random primary shard + ClusterState state = client(internalCluster().getClusterManagerName()).admin().cluster().prepareState().get().getState(); + final int numShards = state.metadata().index(indexName).getNumberOfShards(); + final ShardRouting primaryShard = state.routingTable().index(indexName).shard(randomIntBetween(0, numShards - 1)).primaryShard(); + final DiscoveryNode randomNode = state.nodes().resolveNode(primaryShard.currentNodeId()); + + // stop the random data node, all remaining shards are promoted to primaries + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(randomNode.getName())); + ensureYellowAndNoInitializingShards(indexName); + + state = client(internalCluster().getClusterManagerName()).admin().cluster().prepareState().get().getState(); + for (IndexShardRoutingTable shardRoutingTable : state.routingTable().index(indexName)) { + for (ShardRouting shardRouting : shardRoutingTable.activeShards()) { + assertThat(shardRouting + " should be promoted as a primary", shardRouting.primary(), is(true)); + } + } + + if (indexState == IndexMetadata.State.CLOSE) { + assertAcked(client().admin().indices().prepareOpen(indexName)); + ensureYellowAndNoInitializingShards(indexName); + } + refresh(indexName); + assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs); + } +} diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index dcd13ad8916d5..e2a770fd8322e 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -8,6 +8,8 @@ package org.opensearch.index.translog; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.common.SetOnce; import org.opensearch.core.util.FileSystemUtils; import org.opensearch.common.lease.Releasable; @@ -47,6 +49,7 @@ */ public class RemoteFsTranslog extends Translog { + private static final Logger logger = LogManager.getLogger(RemoteFsTranslog.class); private final BlobStoreRepository blobStoreRepository; private final TranslogTransferManager translogTransferManager; private final FileTransferTracker fileTransferTracker; @@ -82,7 +85,6 @@ public RemoteFsTranslog( this.primaryModeSupplier = primaryModeSupplier; fileTransferTracker = new FileTransferTracker(shardId); this.translogTransferManager = buildTranslogTransferManager(blobStoreRepository, threadPool, shardId, fileTransferTracker); - try { download(translogTransferManager, location); Checkpoint checkpoint = readCheckpoint(location); @@ -131,6 +133,7 @@ public static void download(Repository repository, ShardId shardId, ThreadPool t } public static void download(TranslogTransferManager translogTransferManager, Path location) throws IOException { + logger.info("Downloading translog files from remote for shard {} ", translogTransferManager.getShardId()); TranslogTransferMetadata translogMetadata = translogTransferManager.readMetadata(); if (translogMetadata != null) { if (Files.notExists(location)) { @@ -152,6 +155,7 @@ public static void download(TranslogTransferManager translogTransferManager, Pat location.resolve(Translog.CHECKPOINT_FILE_NAME) ); } + logger.info("Downloaded translog files from remote for shard {} ", translogTransferManager.getShardId()); } public static TranslogTransferManager buildTranslogTransferManager( @@ -161,6 +165,7 @@ public static TranslogTransferManager buildTranslogTransferManager( FileTransferTracker fileTransferTracker ) { return new TranslogTransferManager( + shardId, new BlobStoreTransferService(blobStoreRepository.blobStore(), threadPool), blobStoreRepository.basePath().add(shardId.getIndex().getUUID()).add(String.valueOf(shardId.id())), fileTransferTracker @@ -331,8 +336,9 @@ protected long getMinReferencedGen() throws IOException { assert readLock.isHeldByCurrentThread() || writeLock.isHeldByCurrentThread(); long minReferencedGen = Math.min( deletionPolicy.minTranslogGenRequired(readers, current), - minGenerationForSeqNo(Math.min(deletionPolicy.getLocalCheckpointOfSafeCommit() + 1, minSeqNoToKeep), current, readers) + minGenerationForSeqNo(minSeqNoToKeep, current, readers) ); + assert minReferencedGen >= getMinFileGeneration() : "deletion policy requires a minReferenceGen of [" + minReferencedGen + "] but the lowest gen available is [" diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index ba4d87ce84f07..814574855b85d 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -522,7 +522,8 @@ TranslogWriter createWriter( primaryTermSupplier.getAsLong(), tragedy, persistedSequenceNumberConsumer, - bigArrays + bigArrays, + indexSettings.isRemoteTranslogStoreEnabled() ); } catch (final IOException e) { throw new TranslogException(shardId, "failed to create new translog file", e); @@ -2025,7 +2026,8 @@ public static String createEmptyTranslog( seqNo -> { throw new UnsupportedOperationException(); }, - BigArrays.NON_RECYCLING_INSTANCE + BigArrays.NON_RECYCLING_INSTANCE, + null ); writer.close(); return uuid; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index 3e2c7e448fdad..26d6d9ff8488c 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -37,7 +37,6 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; -import org.opensearch.core.Assertions; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.bytes.BytesArray; import org.opensearch.common.bytes.BytesReference; @@ -51,6 +50,7 @@ import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.concurrent.ReleasableLock; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.Assertions; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; @@ -164,7 +164,8 @@ public static TranslogWriter create( final long primaryTerm, TragicExceptionHolder tragedy, final LongConsumer persistedSequenceNumberConsumer, - final BigArrays bigArrays + final BigArrays bigArrays, + Boolean remoteTranslogEnabled ) throws IOException { final Path checkpointFile = file.getParent().resolve(Translog.CHECKPOINT_FILE_NAME); @@ -185,7 +186,7 @@ public static TranslogWriter create( if (Assertions.ENABLED) { writerGlobalCheckpointSupplier = () -> { long gcp = globalCheckpointSupplier.getAsLong(); - assert gcp >= initialGlobalCheckpoint : "global checkpoint [" + assert gcp >= initialGlobalCheckpoint || (remoteTranslogEnabled == Boolean.TRUE) : "global checkpoint [" + gcp + "] lower than initial gcp [" + initialGlobalCheckpoint diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 95536317bcc1b..243fd8801a562 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -16,6 +16,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.index.shard.ShardId; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.transfer.listener.TranslogTransferListener; import org.opensearch.threadpool.ThreadPool; @@ -47,6 +48,7 @@ */ public class TranslogTransferManager { + private final ShardId shardId; private final TransferService transferService; private final BlobPath remoteBaseTransferPath; private final BlobPath remoteMetadataTransferPath; @@ -59,16 +61,22 @@ public class TranslogTransferManager { private final static String METADATA_DIR = "metadata"; public TranslogTransferManager( + ShardId shardId, TransferService transferService, BlobPath remoteBaseTransferPath, FileTransferTracker fileTransferTracker ) { + this.shardId = shardId; this.transferService = transferService; this.remoteBaseTransferPath = remoteBaseTransferPath; this.remoteMetadataTransferPath = remoteBaseTransferPath.add(METADATA_DIR); this.fileTransferTracker = fileTransferTracker; } + public ShardId getShardId() { + return this.shardId; + } + public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTransferListener translogTransferListener) throws IOException { List exceptionList = new ArrayList<>(transferSnapshot.getTranslogTransferMetadata().getCount()); @@ -229,7 +237,7 @@ public void deleteGenerationAsync(long primaryTerm, Set generations, Runna * @param minPrimaryTermToKeep all primary terms below this primary term are deleted. */ public void deletePrimaryTermsAsync(long minPrimaryTermToKeep) { - logger.info("Deleting primary terms from remote store lesser than {}", minPrimaryTermToKeep); + logger.info("Deleting primary terms from remote store lesser than {} for {}", minPrimaryTermToKeep, shardId); transferService.listFoldersAsync(ThreadPool.Names.REMOTE_PURGE, remoteBaseTransferPath, new ActionListener<>() { @Override public void onResponse(Set folders) { @@ -267,7 +275,7 @@ private void deletePrimaryTermAsync(long primaryTerm) { new ActionListener<>() { @Override public void onResponse(Void unused) { - logger.info("Deleted primary term {}", primaryTerm); + logger.info("Deleted primary term {} for {}", primaryTerm, shardId); } @Override diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java index 39bc744f05820..24dae6f5be9ab 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java @@ -28,15 +28,15 @@ import org.opensearch.common.blobstore.fs.FsBlobStore; import org.opensearch.common.bytes.BytesArray; import org.opensearch.common.bytes.ReleasableBytesReference; -import org.opensearch.core.util.FileSystemUtils; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.ByteSizeUnit; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.ConcurrentCollections; -import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.TestEnvironment; import org.opensearch.index.IndexSettings; @@ -180,6 +180,7 @@ private TranslogConfig getTranslogConfig(final Path path) { // only randomize between nog age retention and a long one, so failures will have a chance of reproducing .put(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey(), randomBoolean() ? "-1ms" : "1h") .put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), randomIntBetween(-1, 2048) + "b") + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) .build(); return getTranslogConfig(path, settings); } @@ -523,7 +524,6 @@ public void testSimpleOperationsUpload() throws Exception { } // expose the new checkpoint (simulating a commit), before we trim the translog - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(0); // simulating the remote segment upload . translog.setMinSeqNoToKeep(0); // This should not trim anything from local @@ -545,7 +545,6 @@ public void testSimpleOperationsUpload() throws Exception { // This should trim tlog-2 from local // This should not trim tlog-2.* files from remote as we not uploading any more translog to remote translog.setMinSeqNoToKeep(1); - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(1); translog.trimUnreferencedReaders(); assertEquals(1, translog.readers.size()); assertBusy(() -> { @@ -563,10 +562,13 @@ public void testSimpleOperationsUpload() throws Exception { // this should now trim as tlog-2 files from remote, but not tlog-3 and tlog-4 addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(1); + assertEquals(2, translog.stats().estimatedNumberOfOperations()); + translog.setMinSeqNoToKeep(2); + translog.trimUnreferencedReaders(); assertEquals(1, translog.readers.size()); + assertEquals(1, translog.stats().estimatedNumberOfOperations()); assertBusy(() -> assertEquals(4, translog.allUploaded().size())); } @@ -576,7 +578,6 @@ public void testMetadataFileDeletion() throws Exception { int numDocs = randomIntBetween(6, 10); for (int i = 0; i < numDocs; i++) { addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(i), i, primaryTerm.get(), new byte[] { 1 })); - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(i - 1); translog.setMinSeqNoToKeep(i); translog.trimUnreferencedReaders(); assertEquals(1, translog.readers.size()); @@ -608,7 +609,6 @@ public void testMetadataFileDeletion() throws Exception { ); int totalDocs = numDocs + moreDocs; - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(totalDocs - 2); translog.setMinSeqNoToKeep(totalDocs - 1); translog.trimUnreferencedReaders(); @@ -617,7 +617,6 @@ public void testMetadataFileDeletion() throws Exception { ops, new Translog.Index(String.valueOf(totalDocs), totalDocs, primaryTerm.get(), new byte[] { 1 }) ); - translog.deletionPolicy.setLocalCheckpointOfSafeCommit(totalDocs - 1); translog.setMinSeqNoToKeep(totalDocs); translog.trimUnreferencedReaders(); assertBusy( @@ -653,7 +652,7 @@ public void testMetadataFileDeletion() throws Exception { int newPrimaryTermDocs = randomIntBetween(5, 10); for (int i = totalDocs + 1; i <= totalDocs + newPrimaryTermDocs; i++) { addToTranslogAndListAndUpload(newTranslog, ops, new Translog.Index(String.valueOf(i), i, primaryTerm.get(), new byte[] { 1 })); - newTranslog.deletionPolicy.setLocalCheckpointOfSafeCommit(i - 1); + // newTranslog.deletionPolicy.setLocalCheckpointOfSafeCommit(i - 1); newTranslog.setMinSeqNoToKeep(i); newTranslog.trimUnreferencedReaders(); } @@ -858,7 +857,7 @@ public void doRun() throws BrokenBarrierException, InterruptedException, IOExcep translog.rollGeneration(); // expose the new checkpoint (simulating a commit), before we trim the translog lastCommittedLocalCheckpoint.set(localCheckpoint); - deletionPolicy.setLocalCheckpointOfSafeCommit(localCheckpoint); + // deletionPolicy.setLocalCheckpointOfSafeCommit(localCheckpoint); translog.setMinSeqNoToKeep(localCheckpoint + 1); translog.trimUnreferencedReaders(); } diff --git a/server/src/test/java/org/opensearch/index/translog/TranslogDeletionPolicyTests.java b/server/src/test/java/org/opensearch/index/translog/TranslogDeletionPolicyTests.java index 983a42880e34c..e9f88d60247fb 100644 --- a/server/src/test/java/org/opensearch/index/translog/TranslogDeletionPolicyTests.java +++ b/server/src/test/java/org/opensearch/index/translog/TranslogDeletionPolicyTests.java @@ -256,7 +256,8 @@ private Tuple, TranslogWriter> createReadersAndWriter(final randomNonNegativeLong(), new TragicExceptionHolder(), seqNo -> {}, - BigArrays.NON_RECYCLING_INSTANCE + BigArrays.NON_RECYCLING_INSTANCE, + null ); writer = Mockito.spy(writer); Mockito.doReturn(now - (numberOfReaders - gen + 1) * 1000).when(writer).getLastModifiedTime(); diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 0a21be99bc06b..1c485dbc35c63 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -47,6 +47,7 @@ public class TranslogTransferManagerTests extends OpenSearchTestCase { private TransferService transferService; + private ShardId shardId; private BlobPath remoteBaseTransferPath; private ThreadPool threadPool; private long primaryTerm; @@ -58,6 +59,7 @@ public void setUp() throws Exception { super.setUp(); primaryTerm = randomNonNegativeLong(); generation = randomNonNegativeLong(); + shardId = mock(ShardId.class); minTranslogGeneration = randomLongBetween(0, generation); remoteBaseTransferPath = new BlobPath().add("base_path"); transferService = mock(TransferService.class); @@ -102,6 +104,7 @@ public void onFailure(TransferFileSnapshot fileSnapshot, Exception e) { }; TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, transferService, remoteBaseTransferPath, fileTransferTracker @@ -177,14 +180,24 @@ public TranslogTransferMetadata getTranslogTransferMetadata() { } public void testReadMetadataNoFile() throws IOException { - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, null); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + null + ); when(transferService.listAll(remoteBaseTransferPath)).thenReturn(Sets.newHashSet()); assertNull(translogTransferManager.readMetadata()); } public void testReadMetadataSingleFile() throws IOException { - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, null); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + null + ); // BlobPath does not have equals method, so we can't use the instance directly in when when(transferService.listAll(any(BlobPath.class))).thenReturn(Sets.newHashSet("12__234")); @@ -198,7 +211,12 @@ public void testReadMetadataSingleFile() throws IOException { } public void testReadMetadataMultipleFiles() throws IOException { - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, null); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + null + ); when(transferService.listAll(any(BlobPath.class))).thenReturn(Sets.newHashSet("12__234", "12__235", "12__233")); @@ -211,7 +229,12 @@ public void testReadMetadataMultipleFiles() throws IOException { } public void testReadMetadataException() throws IOException { - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, null); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + null + ); when(transferService.listAll(any(BlobPath.class))).thenReturn(Sets.newHashSet("12__234", "12__235", "12__233")); @@ -228,6 +251,7 @@ public void testReadMetadataSamePrimaryTermGeneration() throws IOException { public void testDownloadTranslog() throws IOException { Path location = createTempDir(); TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, transferService, remoteBaseTransferPath, new FileTransferTracker(new ShardId("index", "indexUuid", 0)) @@ -254,7 +278,12 @@ public void testDownloadTranslogAlreadyExists() throws IOException { Files.createFile(location.resolve("translog-23.tlog")); Files.createFile(location.resolve("translog-23.ckp")); - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, tracker); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + tracker + ); when(transferService.downloadBlob(any(BlobPath.class), eq("translog-23.tlog"))).thenReturn( new ByteArrayInputStream("Hello Translog".getBytes(StandardCharsets.UTF_8)) @@ -278,7 +307,12 @@ public void testDownloadTranslogWithTrackerUpdated() throws IOException { Files.createFile(location.resolve(translogFile)); Files.createFile(location.resolve(checkpointFile)); - TranslogTransferManager translogTransferManager = new TranslogTransferManager(transferService, remoteBaseTransferPath, tracker); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + tracker + ); when(transferService.downloadBlob(any(BlobPath.class), eq(translogFile))).thenReturn( new ByteArrayInputStream("Hello Translog".getBytes(StandardCharsets.UTF_8)) @@ -310,6 +344,7 @@ public void testDeleteTranslogSuccess() throws Exception { when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); BlobStoreTransferService blobStoreTransferService = new BlobStoreTransferService(blobStore, threadPool); TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, blobStoreTransferService, remoteBaseTransferPath, tracker @@ -333,6 +368,7 @@ public void testDeleteTranslogFailure() throws Exception { // when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); BlobStoreTransferService blobStoreTransferService = new BlobStoreTransferService(blobStore, threadPool); TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, blobStoreTransferService, remoteBaseTransferPath, tracker diff --git a/test/framework/src/main/java/org/opensearch/test/BackgroundIndexer.java b/test/framework/src/main/java/org/opensearch/test/BackgroundIndexer.java index b48cf255ddd4a..647e42e5038f4 100644 --- a/test/framework/src/main/java/org/opensearch/test/BackgroundIndexer.java +++ b/test/framework/src/main/java/org/opensearch/test/BackgroundIndexer.java @@ -79,7 +79,9 @@ public class BackgroundIndexer implements AutoCloseable { final CountDownLatch startLatch = new CountDownLatch(1); final AtomicBoolean hasBudget = new AtomicBoolean(false); // when set to true, writers will acquire writes from a semaphore final Semaphore availableBudget = new Semaphore(0); - final boolean useAutoGeneratedIDs; + boolean useAutoGeneratedIDs; + + private boolean autoStart = false; private final Set ids = ConcurrentCollections.newConcurrentSet(); private volatile Consumer failureAssertion = null; @@ -246,6 +248,7 @@ public void run() { } if (autoStart) { + this.autoStart = true; start(numOfDocs); } } @@ -260,6 +263,16 @@ private void trackFailure(Exception e) { } } + public void setUseAutoGeneratedIDs(boolean useAutoGeneratedIDs) { + // this should only be set when auto run is set to false to ensure no indexing has started + if (this.autoStart == false) { + this.useAutoGeneratedIDs = useAutoGeneratedIDs; + } else { + logger.error("Auto generated ids can be set only when autoStart is set to false"); + } + + } + private XContentBuilder generateSource(long id, Random random) throws IOException { int contentLength = RandomNumbers.randomIntBetween(random, minFieldSize, maxFieldSize); StringBuilder text = new StringBuilder(contentLength); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 5af7be76b7b86..500a4d5eb15ef 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -1089,6 +1089,23 @@ public void waitForDocs(final long numDocs, final BackgroundIndexer indexer) thr }, maxWaitTimeMs, TimeUnit.MILLISECONDS); } + /** + * Waits until at least a give number of document is indexed by indexer + * + * @param numDocs number of documents to wait for + * @param indexer a {@link BackgroundIndexer}. It will be first checked for documents indexed. + * This saves on unneeded searches. + */ + public void waitForIndexed(final long numDocs, final BackgroundIndexer indexer) throws Exception { + // indexing threads can wait for up to ~1m before retrying when they first try to index into a shard which is not STARTED. + final long maxWaitTimeMs = Math.max(90 * 1000, 200 * numDocs); + + assertBusy(() -> { + long lastKnownCount = indexer.totalIndexedDocs(); + assertThat(lastKnownCount, greaterThanOrEqualTo(numDocs)); + }, maxWaitTimeMs, TimeUnit.MILLISECONDS); + } + /** * Prints the current cluster state as debug logging. */ From 3d864187ec78b3cdaa16662eeedb3123f2265bdf Mon Sep 17 00:00:00 2001 From: Kunal Kotwani Date: Wed, 17 May 2023 05:10:37 -0700 Subject: [PATCH 03/10] Update compatibility version for file cache API (#7596) Signed-off-by: Kunal Kotwani --- .../admin/indices/cache/clear/ClearIndicesCacheRequest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java index 793d02d6c4d13..1be3bbe0a791c 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java @@ -59,7 +59,7 @@ public ClearIndicesCacheRequest(StreamInput in) throws IOException { fieldDataCache = in.readBoolean(); fields = in.readStringArray(); requestCache = in.readBoolean(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_8_0)) { fileCache = in.readBoolean(); } } @@ -120,7 +120,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(fieldDataCache); out.writeStringArrayNullable(fields); out.writeBoolean(requestCache); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_8_0)) { out.writeBoolean(fileCache); } } From 170dcef809080e2c336fe7ce441a1edf9a8982c2 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 17 May 2023 20:49:44 +0800 Subject: [PATCH 04/10] Add more index blocks check for resize APIs (#4845) (#6774) * Add more index blocks check for resize APIs (#4845) Signed-off-by: Gao Binlong * modify change log Signed-off-by: Gao Binlong * Rewording the error message Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../test/indices.clone/10_basic.yml | 96 ++++++++++++++ .../test/indices.shrink/10_basic.yml | 118 ++++++++++++++++-- .../test/indices.split/10_basic.yml | 100 +++++++++++++++ .../admin/indices/shrink/ResizeRequest.java | 21 ++++ .../indices/shrink/TransportResizeAction.java | 15 +++ .../cluster/metadata/IndexMetadata.java | 4 +- .../metadata/MetadataCreateIndexService.java | 6 +- .../indices/shrink/ResizeRequestTests.java | 25 +++- .../shrink/TransportResizeActionTests.java | 91 ++++++++++++++ .../cluster/metadata/IndexMetadataTests.java | 19 +++ .../MetadataCreateIndexServiceTests.java | 63 +++++++++- 12 files changed, 539 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 798be4678e550..5b6b0343e397c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -126,6 +126,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Removed ### Fixed +- Add more index blocks check for resize APIs ([#6774](https://github.com/opensearch-project/OpenSearch/pull/6774)) - Replaces ZipInputStream with ZipFile to fix Zip Slip vulnerability ([#7230](https://github.com/opensearch-project/OpenSearch/pull/7230)) - Add missing validation/parsing of SearchBackpressureMode of SearchBackpressureSettings ([#7541](https://github.com/opensearch-project/OpenSearch/pull/7541)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.clone/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.clone/10_basic.yml index ca8342b2e91c2..d92fb434ba718 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.clone/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.clone/10_basic.yml @@ -112,3 +112,99 @@ setup: settings: index.number_of_replicas: 0 index.number_of_shards: 6 + +--- +"Returns error if target index's metadata write is blocked": + + - skip: + version: " - 2.9.99" + reason: "only available in 3.0+" + + # block source index's write operations + - do: + indices.put_settings: + index: source + body: + index.blocks.write: true + index.number_of_replicas: 0 + + - do: + cluster.health: + wait_for_status: green + index: source + + # set `index.blocks.read_only` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.clone: + index: "source" + target: "new_cloned_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.blocks.read_only: true + + # set `index.blocks.metadata` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.clone: + index: "source" + target: "new_cloned_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.blocks.metadata: true + + # set source index's setting `index.blocks.read_only` to `true` + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: true + + - do: + catch: /illegal_argument_exception/ + indices.clone: + index: "source" + target: "new_cloned_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + + # overwrite the source index's setting, everything is fine + - do: + indices.clone: + index: "source" + target: "new_cloned_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.blocks.read_only: null + + - do: + cluster.health: + wait_for_status: green + + - do: + get: + index: new_cloned_index + id: "1" + + - match: { _index: new_cloned_index } + - match: { _id: "1" } + - match: { _source: { foo: "hello world" } } + + # clear the source index's read_only blocks because it will block deleting index + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: null diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yml index 032f061d8a160..3b89d5c93acd8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yml @@ -1,15 +1,11 @@ --- -"Shrink index via API": +setup: # creates an index with one document solely allocated on a particular data node # and shrinks it into a new index with a single shard # we don't do the relocation to a single node after the index is created # here since in a mixed version cluster we can't identify # which node is the one with the highest version and that is the only one that can safely # be used to shrink the index. - - - skip: - features: allowed_warnings - - do: nodes.info: node_id: data:true @@ -32,14 +28,10 @@ id: "1" body: { "foo": "hello world" } - - do: - get: - index: source - id: "1" - - - match: { _index: source } - - match: { _id: "1" } - - match: { _source: { foo: "hello world" } } +--- +"Shrink index via API": + - skip: + features: allowed_warnings # make it read-only - do: @@ -79,3 +71,103 @@ - match: { _index: target } - match: { _id: "1" } - match: { _source: { foo: "hello world" } } + +--- +"Returns error if target index's metadata write is blocked": + + - skip: + version: " - 2.9.99" + reason: "only available in 3.0+" + + # block source index's write operations + - do: + indices.put_settings: + index: source + body: + index.blocks.write: true + index.number_of_replicas: 0 + + - do: + cluster.health: + wait_for_status: green + index: source + + # set `index.blocks.read_only` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.shrink: + index: "source" + target: "new_shrunken_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 1 + index.blocks.read_only: true + + # set `index.blocks.metadata` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.shrink: + index: "source" + target: "new_shrunken_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 1 + index.blocks.metadata: true + + # set source index's setting `index.blocks.read_only` to `true` + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: true + + - do: + catch: /illegal_argument_exception/ + indices.shrink: + index: "source" + target: "new_shrunken_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 1 + + # overwrite the source index's setting, everything is fine + - do: + indices.shrink: + index: "source" + target: "new_shrunken_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 1 + index.blocks.read_only: null + + - do: + cluster.health: + wait_for_status: green + + - do: + get: + index: new_shrunken_index + id: "1" + + - match: { _index: new_shrunken_index } + - match: { _id: "1" } + - match: { _source: { foo: "hello world" } } + + # clear the source index's read_only blocks because it will block deleting index + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: null diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.split/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.split/10_basic.yml index 2432f47d4dca7..5e1cae0d8efc5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.split/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.split/10_basic.yml @@ -218,3 +218,103 @@ setup: settings: index.number_of_replicas: 0 index.number_of_shards: 6 + +--- +"Returns error if target index's metadata write is blocked": + + - skip: + version: " - 2.9.99" + reason: "only available in 3.0+" + + # block source index's write operations + - do: + indices.put_settings: + index: source + body: + index.blocks.write: true + index.number_of_replicas: 0 + + - do: + cluster.health: + wait_for_status: green + index: source + + # set `index.blocks.read_only` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.split: + index: "source" + target: "new_split_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 4 + index.blocks.read_only: true + + # set `index.blocks.metadata` to `true` for target index + - do: + catch: /action_request_validation_exception/ + indices.split: + index: "source" + target: "new_split_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 4 + index.blocks.metadata: true + + # set source index's setting `index.blocks.read_only` to `true` + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: true + + - do: + catch: /illegal_argument_exception/ + indices.split: + index: "source" + target: "new_split_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 4 + + # overwrite the source index's setting, everything is fine + - do: + indices.split: + index: "source" + target: "new_split_index" + wait_for_active_shards: 1 + cluster_manager_timeout: 10s + body: + settings: + index.number_of_replicas: 0 + index.number_of_shards: 4 + index.blocks.read_only: null + + - do: + cluster.health: + wait_for_status: green + + - do: + get: + index: new_split_index + id: "1" + + - match: { _index: new_split_index } + - match: { _id: "1" } + - match: { _source: { foo: "hello world" } } + + # clear the source index's read_only blocks because it will block deleting index + - do: + indices.put_settings: + index: source + body: + index.blocks.read_only: null diff --git a/server/src/main/java/org/opensearch/action/admin/indices/shrink/ResizeRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/shrink/ResizeRequest.java index 1e524e720da9b..78636fd04984b 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/shrink/ResizeRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/shrink/ResizeRequest.java @@ -146,6 +146,27 @@ public ActionRequestValidationException validate() { if (maxShardSize != null && maxShardSize.getBytes() <= 0) { validationException = addValidationError("max_shard_size must be greater than 0", validationException); } + // Check target index's settings, if `index.blocks.read_only` is `true`, the target index's metadata writes will be disabled + // and then cause the new shards to be unassigned. + if (IndexMetadata.INDEX_READ_ONLY_SETTING.get(targetIndexRequest.settings()) == true) { + validationException = addValidationError( + "target index [" + + targetIndexRequest.index() + + "] will be blocked by [index.blocks.read_only=true], this will disable metadata writes and cause the shards to be unassigned", + validationException + ); + } + + // Check target index's settings, if `index.blocks.metadata` is `true`, the target index's metadata writes will be disabled + // and then cause the new shards to be unassigned. + if (IndexMetadata.INDEX_BLOCKS_METADATA_SETTING.get(targetIndexRequest.settings()) == true) { + validationException = addValidationError( + "target index [" + + targetIndexRequest.index() + + "] will be blocked by [index.blocks.metadata=true], this will disable metadata writes and cause the shards to be unassigned", + validationException + ); + } assert copySettings == null || copySettings; return validationException; } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/shrink/TransportResizeAction.java b/server/src/main/java/org/opensearch/action/admin/indices/shrink/TransportResizeAction.java index 5bcf3631bf398..fe60a82d0e76f 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/shrink/TransportResizeAction.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/shrink/TransportResizeAction.java @@ -181,6 +181,21 @@ static CreateIndexClusterStateUpdateRequest prepareCreateIndexRequest( final Settings targetIndexSettings = targetIndexSettingsBuilder.build(); final int numShards; + // We should check the source index's setting `index.blocks.read_only`, because the setting will be copied to target index, + // it will block target index's metadata writes and then cause the new shards to be unassigned, + // but if user overwrites the setting to `false` or `null`, everything is fine. + // We don't need to check the setting `index.blocks.metadata`, because it was checked when fetching index stats + if (IndexMetadata.INDEX_READ_ONLY_SETTING.get(metadata.getSettings()) == true + && IndexMetadata.INDEX_READ_ONLY_SETTING.exists(targetIndexSettings) == false) { + throw new IllegalArgumentException( + "target index [" + + targetIndexName + + "] will be blocked by [index.blocks.read_only=true] which is copied from the source index [" + + sourceIndexName + + "], this will disable metadata writes and cause the shards to be unassigned" + ); + } + if (IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.exists(targetIndexSettings)) { numShards = IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.get(targetIndexSettings); } else { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index 4ca79541637b1..e7fee330f5b00 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -2011,8 +2011,8 @@ public static ShardId selectCloneShard(int shardId, IndexMetadata sourceIndexMet throw new IllegalArgumentException( "the number of target shards (" + numTargetShards - + ") must be the same as the number of " - + " source shards ( " + + ") must be the same as the number of" + + " source shards (" + numSourceShards + ")" ); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index bc57b2ff56da3..d3cb68d65affe 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1389,9 +1389,11 @@ static IndexMetadata validateResize(ClusterState state, String sourceIndex, Stri ); } - // ensure index is read-only + // ensure write operations on the source index is blocked if (state.blocks().indexBlocked(ClusterBlockLevel.WRITE, sourceIndex) == false) { - throw new IllegalStateException("index " + sourceIndex + " must be read-only to resize index. use \"index.blocks.write=true\""); + throw new IllegalStateException( + "index " + sourceIndex + " must block write operations to resize index. use \"index.blocks.write=true\"" + ); } if (IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.exists(targetIndexSettings)) { diff --git a/server/src/test/java/org/opensearch/action/admin/indices/shrink/ResizeRequestTests.java b/server/src/test/java/org/opensearch/action/admin/indices/shrink/ResizeRequestTests.java index d591c395d5402..4742157b3209b 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/shrink/ResizeRequestTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/shrink/ResizeRequestTests.java @@ -130,13 +130,36 @@ public void testToAndFromXContent() throws IOException { public void testTargetIndexSettingsValidation() { ResizeRequest resizeRequest = new ResizeRequest(randomAlphaOfLengthBetween(3, 10), randomAlphaOfLengthBetween(3, 10)); CreateIndexRequest createIndexRequest = new CreateIndexRequest(randomAlphaOfLengthBetween(3, 10)); + createIndexRequest.settings(Settings.builder().put("index.blocks.read_only", true)); + resizeRequest.setTargetIndex(createIndexRequest); + ActionRequestValidationException e = resizeRequest.validate(); + assertEquals( + "Validation Failed: 1: target index [" + + createIndexRequest.index() + + "] will be blocked by [index.blocks.read_only=true]," + + " this will disable metadata writes and cause the shards to be unassigned;", + e.getMessage() + ); + + createIndexRequest.settings(Settings.builder().put("index.blocks.metadata", true)); + resizeRequest.setMaxShardSize(new ByteSizeValue(randomIntBetween(1, 100))); + resizeRequest.setTargetIndex(createIndexRequest); + e = resizeRequest.validate(); + assertEquals( + "Validation Failed: 1: target index [" + + createIndexRequest.index() + + "] will be blocked by [index.blocks.metadata=true]," + + " this will disable metadata writes and cause the shards to be unassigned;", + e.getMessage() + ); + createIndexRequest.settings( Settings.builder() .put("index.sort.field", randomAlphaOfLengthBetween(3, 10)) .put("index.routing_partition_size", randomIntBetween(1, 10)) ); resizeRequest.setTargetIndex(createIndexRequest); - ActionRequestValidationException e = resizeRequest.validate(); + e = resizeRequest.validate(); assertEquals( "Validation Failed: 1: can't override index sort when resizing an index;" + "2: cannot provide a routing partition size value when resizing an index;", diff --git a/server/src/test/java/org/opensearch/action/admin/indices/shrink/TransportResizeActionTests.java b/server/src/test/java/org/opensearch/action/admin/indices/shrink/TransportResizeActionTests.java index b830c6b01792c..ef49820192e9b 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/shrink/TransportResizeActionTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/shrink/TransportResizeActionTests.java @@ -470,6 +470,97 @@ public void testCalculateTargetIndexShardsNum() { ); } + public void testIndexBlocks() { + String indexName = randomAlphaOfLength(10); + // create one that won't fail + ClusterState clusterState = ClusterState.builder( + createClusterState(indexName, 10, 0, 40, Settings.builder().put("index.blocks.read_only", true).build()) + ).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build(); + + // Target index will be blocked by [index.blocks.read_only=true] copied from the source index + ResizeRequest resizeRequest = new ResizeRequest("target", indexName); + ResizeType resizeType; + switch (randomIntBetween(0, 2)) { + case 0: + resizeType = ResizeType.SHRINK; + break; + case 1: + resizeType = ResizeType.SPLIT; + break; + default: + resizeType = ResizeType.CLONE; + } + resizeRequest.setResizeType(resizeType); + resizeRequest.getTargetIndexRequest().settings(Settings.builder().put("index.number_of_shards", randomIntBetween(1, 100)).build()); + ClusterState finalState = clusterState; + IllegalArgumentException iae = expectThrows( + IllegalArgumentException.class, + () -> TransportResizeAction.prepareCreateIndexRequest( + resizeRequest, + finalState, + null, + new StoreStats(between(1, 10000), between(1, 10000)), + indexName, + "target" + ) + ); + assertEquals( + "target index [target] will be blocked by [index.blocks.read_only=true] which is copied from the source index [" + + indexName + + "], this will disable metadata writes and cause the shards to be unassigned", + iae.getMessage() + ); + + // Overwrites the source index's settings index.blocks.read_only, so resize won't fail + AllocationService service = new AllocationService( + new AllocationDeciders(Collections.singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + RoutingTable routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // now we start the shard + routingTable = OpenSearchAllocationTestCase.startInitializingShardsAndReroute(service, clusterState, indexName).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + int numSourceShards = clusterState.metadata().index(indexName).getNumberOfShards(); + DocsStats stats = new DocsStats(between(0, (IndexWriter.MAX_DOCS) / numSourceShards), between(1, 1000), between(1, 10000)); + + int expectedShardsNum; + String cause; + switch (resizeType) { + case SHRINK: + expectedShardsNum = 5; + cause = "shrink_index"; + break; + case SPLIT: + expectedShardsNum = 20; + cause = "split_index"; + break; + default: + expectedShardsNum = 10; + cause = "clone_index"; + } + resizeRequest.getTargetIndexRequest() + .settings(Settings.builder().put("index.number_of_shards", expectedShardsNum).put("index.blocks.read_only", false).build()); + final ActiveShardCount activeShardCount = randomBoolean() ? ActiveShardCount.ALL : ActiveShardCount.ONE; + resizeRequest.setWaitForActiveShards(activeShardCount); + CreateIndexClusterStateUpdateRequest request = TransportResizeAction.prepareCreateIndexRequest( + resizeRequest, + clusterState, + (i) -> stats, + new StoreStats(100, between(1, 10000)), + indexName, + "target" + ); + assertNotNull(request.recoverFrom()); + assertEquals(indexName, request.recoverFrom().getName()); + assertEquals(String.valueOf(expectedShardsNum), request.settings().get("index.number_of_shards")); + assertEquals(cause, request.cause()); + assertEquals(request.waitForActiveShards(), activeShardCount); + } + private DiscoveryNode newNode(String nodeId) { final Set roles = Collections.unmodifiableSet( new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) diff --git a/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java index 59a16f07a6ac9..38fb1a657b9e8 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java @@ -228,6 +228,25 @@ public void testSelectShrinkShards() { ); } + public void testSelectCloneShard() { + int numberOfReplicas = randomIntBetween(0, 10); + IndexMetadata metadata = IndexMetadata.builder("foo") + .settings( + Settings.builder() + .put("index.version.created", 1) + .put("index.number_of_shards", 10) + .put("index.number_of_replicas", numberOfReplicas) + .build() + ) + .creationDate(randomLong()) + .build(); + + assertEquals( + "the number of target shards (11) must be the same as the number of source shards (10)", + expectThrows(IllegalArgumentException.class, () -> IndexMetadata.selectCloneShard(0, metadata, 11)).getMessage() + ); + } + public void testSelectResizeShards() { int numTargetShards = randomFrom(4, 6, 8, 12); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 74b294709f95f..00d496fde0434 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -275,7 +275,7 @@ public void testValidateShrinkIndex() { ); assertEquals( - "index source must be read-only to resize index. use \"index.blocks.write=true\"", + "index source must block write operations to resize index. use \"index.blocks.write=true\"", expectThrows( IllegalStateException.class, () -> MetadataCreateIndexService.validateShrinkIndex( @@ -377,7 +377,7 @@ public void testValidateSplitIndex() { ); assertEquals( - "index source must be read-only to resize index. use \"index.blocks.write=true\"", + "index source must block write operations to resize index. use \"index.blocks.write=true\"", expectThrows( IllegalStateException.class, () -> MetadataCreateIndexService.validateSplitIndex( @@ -436,6 +436,65 @@ public void testValidateSplitIndex() { ); } + public void testValidateCloneIndex() { + int numShards = randomIntBetween(1, 42); + Settings targetSettings = Settings.builder().put("index.number_of_shards", numShards).build(); + ClusterState state = createClusterState( + "source", + numShards, + randomIntBetween(0, 10), + Settings.builder().put("index.blocks.write", true).build() + ); + + assertEquals( + "index [source] already exists", + expectThrows( + ResourceAlreadyExistsException.class, + () -> MetadataCreateIndexService.validateCloneIndex(state, "target", "source", targetSettings) + ).getMessage() + ); + + assertEquals( + "no such index [no_such_index]", + expectThrows( + IndexNotFoundException.class, + () -> MetadataCreateIndexService.validateCloneIndex(state, "no_such_index", "target", targetSettings) + ).getMessage() + ); + + assertEquals( + "index source must block write operations to resize index. use \"index.blocks.write=true\"", + expectThrows( + IllegalStateException.class, + () -> MetadataCreateIndexService.validateCloneIndex( + createClusterState("source", randomIntBetween(2, 100), randomIntBetween(0, 10), Settings.EMPTY), + "source", + "target", + targetSettings + ) + ).getMessage() + ); + + ClusterState clusterState = ClusterState.builder( + createClusterState("source", numShards, 0, Settings.builder().put("index.blocks.write", true).build()) + ).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build(); + AllocationService service = new AllocationService( + new AllocationDeciders(singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + + RoutingTable routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // now we start the shard + routingTable = OpenSearchAllocationTestCase.startInitializingShardsAndReroute(service, clusterState, "source").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + MetadataCreateIndexService.validateCloneIndex(clusterState, "source", "target", targetSettings); + } + public void testPrepareResizeIndexSettings() { final List versions = Arrays.asList(VersionUtils.randomVersion(random()), VersionUtils.randomVersion(random())); versions.sort(Comparator.comparingLong(l -> l.id)); From 77100f9c21be4c63efc7300a69735bf84464b7c9 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Wed, 17 May 2023 12:32:49 -0400 Subject: [PATCH 05/10] Bump Jackson from 2.15.0 to 2.15.1 (#7603) Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + buildSrc/version.properties | 4 ++-- client/sniffer/licenses/jackson-core-2.15.0.jar.sha1 | 1 - client/sniffer/licenses/jackson-core-2.15.1.jar.sha1 | 1 + .../upgrade-cli/licenses/jackson-annotations-2.15.0.jar.sha1 | 1 - .../upgrade-cli/licenses/jackson-annotations-2.15.1.jar.sha1 | 1 + .../upgrade-cli/licenses/jackson-databind-2.15.0.jar.sha1 | 1 - .../upgrade-cli/licenses/jackson-databind-2.15.1.jar.sha1 | 1 + libs/core/licenses/jackson-core-2.15.0.jar.sha1 | 1 - libs/core/licenses/jackson-core-2.15.1.jar.sha1 | 1 + libs/x-content/licenses/jackson-core-2.15.0.jar.sha1 | 1 - libs/x-content/licenses/jackson-core-2.15.1.jar.sha1 | 1 + .../licenses/jackson-dataformat-cbor-2.15.0.jar.sha1 | 1 - .../licenses/jackson-dataformat-cbor-2.15.1.jar.sha1 | 1 + .../licenses/jackson-dataformat-smile-2.15.0.jar.sha1 | 1 - .../licenses/jackson-dataformat-smile-2.15.1.jar.sha1 | 1 + .../licenses/jackson-dataformat-yaml-2.15.0.jar.sha1 | 1 - .../licenses/jackson-dataformat-yaml-2.15.1.jar.sha1 | 1 + .../ingest-geoip/licenses/jackson-annotations-2.15.0.jar.sha1 | 1 - .../ingest-geoip/licenses/jackson-annotations-2.15.1.jar.sha1 | 1 + .../ingest-geoip/licenses/jackson-databind-2.15.0.jar.sha1 | 1 - .../ingest-geoip/licenses/jackson-databind-2.15.1.jar.sha1 | 1 + .../licenses/jackson-annotations-2.15.0.jar.sha1 | 1 - .../licenses/jackson-annotations-2.15.1.jar.sha1 | 1 + .../discovery-ec2/licenses/jackson-databind-2.15.0.jar.sha1 | 1 - .../discovery-ec2/licenses/jackson-databind-2.15.1.jar.sha1 | 1 + .../licenses/jackson-annotations-2.15.0.jar.sha1 | 1 - .../licenses/jackson-annotations-2.15.1.jar.sha1 | 1 + .../licenses/jackson-databind-2.15.0.jar.sha1 | 1 - .../licenses/jackson-databind-2.15.1.jar.sha1 | 1 + .../licenses/jackson-dataformat-xml-2.15.0.jar.sha1 | 1 - .../licenses/jackson-dataformat-xml-2.15.1.jar.sha1 | 1 + .../licenses/jackson-datatype-jsr310-2.15.0.jar.sha1 | 1 - .../licenses/jackson-datatype-jsr310-2.15.1.jar.sha1 | 1 + .../licenses/jackson-module-jaxb-annotations-2.15.0.jar.sha1 | 1 - .../licenses/jackson-module-jaxb-annotations-2.15.1.jar.sha1 | 1 + .../licenses/jackson-annotations-2.15.0.jar.sha1 | 1 - .../licenses/jackson-annotations-2.15.1.jar.sha1 | 1 + .../repository-s3/licenses/jackson-databind-2.15.0.jar.sha1 | 1 - .../repository-s3/licenses/jackson-databind-2.15.1.jar.sha1 | 1 + 40 files changed, 22 insertions(+), 21 deletions(-) delete mode 100644 client/sniffer/licenses/jackson-core-2.15.0.jar.sha1 create mode 100644 client/sniffer/licenses/jackson-core-2.15.1.jar.sha1 delete mode 100644 distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.1.jar.sha1 delete mode 100644 distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.1.jar.sha1 delete mode 100644 libs/core/licenses/jackson-core-2.15.0.jar.sha1 create mode 100644 libs/core/licenses/jackson-core-2.15.1.jar.sha1 delete mode 100644 libs/x-content/licenses/jackson-core-2.15.0.jar.sha1 create mode 100644 libs/x-content/licenses/jackson-core-2.15.1.jar.sha1 delete mode 100644 libs/x-content/licenses/jackson-dataformat-cbor-2.15.0.jar.sha1 create mode 100644 libs/x-content/licenses/jackson-dataformat-cbor-2.15.1.jar.sha1 delete mode 100644 libs/x-content/licenses/jackson-dataformat-smile-2.15.0.jar.sha1 create mode 100644 libs/x-content/licenses/jackson-dataformat-smile-2.15.1.jar.sha1 delete mode 100644 libs/x-content/licenses/jackson-dataformat-yaml-2.15.0.jar.sha1 create mode 100644 libs/x-content/licenses/jackson-dataformat-yaml-2.15.1.jar.sha1 delete mode 100644 modules/ingest-geoip/licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 modules/ingest-geoip/licenses/jackson-annotations-2.15.1.jar.sha1 delete mode 100644 modules/ingest-geoip/licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 modules/ingest-geoip/licenses/jackson-databind-2.15.1.jar.sha1 delete mode 100644 plugins/discovery-ec2/licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 plugins/discovery-ec2/licenses/jackson-annotations-2.15.1.jar.sha1 delete mode 100644 plugins/discovery-ec2/licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 plugins/discovery-ec2/licenses/jackson-databind-2.15.1.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/jackson-annotations-2.15.1.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/jackson-databind-2.15.1.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.1.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.1.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.1.jar.sha1 delete mode 100644 plugins/repository-s3/licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 plugins/repository-s3/licenses/jackson-annotations-2.15.1.jar.sha1 delete mode 100644 plugins/repository-s3/licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 plugins/repository-s3/licenses/jackson-databind-2.15.1.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b6b0343e397c..c44b69b7c96c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -115,6 +115,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.google.guava:guava` from 30.1.1-jre to 31.1-jre (#7565) - Bump `com.azure:azure-storage-common` from 12.20.0 to 12.21.0 (#7566) - Bump `org.apache.commons:commons-compress` from 1.22 to 1.23.0 (#7563) +- Bump `jackson` from 2.15.0 to 2.15.1 ([#7603](https://github.com/opensearch-project/OpenSearch/pull/7603)) ### Changed - Enable `./gradlew build` on MacOS by disabling bcw tests ([#7303](https://github.com/opensearch-project/OpenSearch/pull/7303)) diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 195050939b11d..bfc81ae684b73 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -8,8 +8,8 @@ bundled_jdk = 19.0.2+7 # optional dependencies spatial4j = 0.7 jts = 1.15.0 -jackson = 2.15.0 -jackson_databind = 2.15.0 +jackson = 2.15.1 +jackson_databind = 2.15.1 snakeyaml = 2.0 icu4j = 70.1 supercsv = 2.4.0 diff --git a/client/sniffer/licenses/jackson-core-2.15.0.jar.sha1 b/client/sniffer/licenses/jackson-core-2.15.0.jar.sha1 deleted file mode 100644 index 399338f25a34e..0000000000000 --- a/client/sniffer/licenses/jackson-core-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -12f334a1dc9c6d2854c43ae314024dde8b3ad572 \ No newline at end of file diff --git a/client/sniffer/licenses/jackson-core-2.15.1.jar.sha1 b/client/sniffer/licenses/jackson-core-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..c05879d55296a --- /dev/null +++ b/client/sniffer/licenses/jackson-core-2.15.1.jar.sha1 @@ -0,0 +1 @@ +241c054ba8503de092a12acad9f083dd39935cc0 \ No newline at end of file diff --git a/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.0.jar.sha1 b/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.0.jar.sha1 deleted file mode 100644 index 695645e221aeb..0000000000000 --- a/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.1.jar.sha1 b/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..a3ade4ff8dadc --- /dev/null +++ b/distribution/tools/upgrade-cli/licenses/jackson-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +092a90d3739e970e03b5971839e4fe51f13c1fa3 \ No newline at end of file diff --git a/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.0.jar.sha1 b/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.0.jar.sha1 deleted file mode 100644 index 059bbf5747b97..0000000000000 --- a/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.1.jar.sha1 b/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..47405b86d4a51 --- /dev/null +++ b/distribution/tools/upgrade-cli/licenses/jackson-databind-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ac9ba74d208faf356e4719a49e59c6ea9237c01d \ No newline at end of file diff --git a/libs/core/licenses/jackson-core-2.15.0.jar.sha1 b/libs/core/licenses/jackson-core-2.15.0.jar.sha1 deleted file mode 100644 index 399338f25a34e..0000000000000 --- a/libs/core/licenses/jackson-core-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -12f334a1dc9c6d2854c43ae314024dde8b3ad572 \ No newline at end of file diff --git a/libs/core/licenses/jackson-core-2.15.1.jar.sha1 b/libs/core/licenses/jackson-core-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..c05879d55296a --- /dev/null +++ b/libs/core/licenses/jackson-core-2.15.1.jar.sha1 @@ -0,0 +1 @@ +241c054ba8503de092a12acad9f083dd39935cc0 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-core-2.15.0.jar.sha1 b/libs/x-content/licenses/jackson-core-2.15.0.jar.sha1 deleted file mode 100644 index 399338f25a34e..0000000000000 --- a/libs/x-content/licenses/jackson-core-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -12f334a1dc9c6d2854c43ae314024dde8b3ad572 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-core-2.15.1.jar.sha1 b/libs/x-content/licenses/jackson-core-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..c05879d55296a --- /dev/null +++ b/libs/x-content/licenses/jackson-core-2.15.1.jar.sha1 @@ -0,0 +1 @@ +241c054ba8503de092a12acad9f083dd39935cc0 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-cbor-2.15.0.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-cbor-2.15.0.jar.sha1 deleted file mode 100644 index bb1362b20db1b..0000000000000 --- a/libs/x-content/licenses/jackson-dataformat-cbor-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bb4bb4c699571e8683e08f19c02cf9946e7e1346 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-cbor-2.15.1.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-cbor-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..096265b896013 --- /dev/null +++ b/libs/x-content/licenses/jackson-dataformat-cbor-2.15.1.jar.sha1 @@ -0,0 +1 @@ +c1c88d6050bbfbfb040e1036e1cfb8f82e00df7b \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-smile-2.15.0.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-smile-2.15.0.jar.sha1 deleted file mode 100644 index 2e5c2f8092648..0000000000000 --- a/libs/x-content/licenses/jackson-dataformat-smile-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6f383da770d7fe56b5b0a5f6ff35b5d7ef2ef1f8 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-smile-2.15.1.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-smile-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..6bff8b0b8dba9 --- /dev/null +++ b/libs/x-content/licenses/jackson-dataformat-smile-2.15.1.jar.sha1 @@ -0,0 +1 @@ +46d5f70e4ebb2e4c329701c45226a78978601e04 \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-yaml-2.15.0.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-yaml-2.15.0.jar.sha1 deleted file mode 100644 index a3cdfa61c0e14..0000000000000 --- a/libs/x-content/licenses/jackson-dataformat-yaml-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fafaf40ed4bcd4bc29891441b7d256f24e55fa7f \ No newline at end of file diff --git a/libs/x-content/licenses/jackson-dataformat-yaml-2.15.1.jar.sha1 b/libs/x-content/licenses/jackson-dataformat-yaml-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..66e20ea31c7f0 --- /dev/null +++ b/libs/x-content/licenses/jackson-dataformat-yaml-2.15.1.jar.sha1 @@ -0,0 +1 @@ +97a9c5867fbacd44a1791c2d0a00bd4f3f061b77 \ No newline at end of file diff --git a/modules/ingest-geoip/licenses/jackson-annotations-2.15.0.jar.sha1 b/modules/ingest-geoip/licenses/jackson-annotations-2.15.0.jar.sha1 deleted file mode 100644 index 695645e221aeb..0000000000000 --- a/modules/ingest-geoip/licenses/jackson-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/modules/ingest-geoip/licenses/jackson-annotations-2.15.1.jar.sha1 b/modules/ingest-geoip/licenses/jackson-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..a3ade4ff8dadc --- /dev/null +++ b/modules/ingest-geoip/licenses/jackson-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +092a90d3739e970e03b5971839e4fe51f13c1fa3 \ No newline at end of file diff --git a/modules/ingest-geoip/licenses/jackson-databind-2.15.0.jar.sha1 b/modules/ingest-geoip/licenses/jackson-databind-2.15.0.jar.sha1 deleted file mode 100644 index 059bbf5747b97..0000000000000 --- a/modules/ingest-geoip/licenses/jackson-databind-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/modules/ingest-geoip/licenses/jackson-databind-2.15.1.jar.sha1 b/modules/ingest-geoip/licenses/jackson-databind-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..47405b86d4a51 --- /dev/null +++ b/modules/ingest-geoip/licenses/jackson-databind-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ac9ba74d208faf356e4719a49e59c6ea9237c01d \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/jackson-annotations-2.15.0.jar.sha1 b/plugins/discovery-ec2/licenses/jackson-annotations-2.15.0.jar.sha1 deleted file mode 100644 index 695645e221aeb..0000000000000 --- a/plugins/discovery-ec2/licenses/jackson-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/jackson-annotations-2.15.1.jar.sha1 b/plugins/discovery-ec2/licenses/jackson-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..a3ade4ff8dadc --- /dev/null +++ b/plugins/discovery-ec2/licenses/jackson-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +092a90d3739e970e03b5971839e4fe51f13c1fa3 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/jackson-databind-2.15.0.jar.sha1 b/plugins/discovery-ec2/licenses/jackson-databind-2.15.0.jar.sha1 deleted file mode 100644 index 059bbf5747b97..0000000000000 --- a/plugins/discovery-ec2/licenses/jackson-databind-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/jackson-databind-2.15.1.jar.sha1 b/plugins/discovery-ec2/licenses/jackson-databind-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..47405b86d4a51 --- /dev/null +++ b/plugins/discovery-ec2/licenses/jackson-databind-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ac9ba74d208faf356e4719a49e59c6ea9237c01d \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-annotations-2.15.0.jar.sha1 b/plugins/repository-azure/licenses/jackson-annotations-2.15.0.jar.sha1 deleted file mode 100644 index 695645e221aeb..0000000000000 --- a/plugins/repository-azure/licenses/jackson-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-annotations-2.15.1.jar.sha1 b/plugins/repository-azure/licenses/jackson-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..a3ade4ff8dadc --- /dev/null +++ b/plugins/repository-azure/licenses/jackson-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +092a90d3739e970e03b5971839e4fe51f13c1fa3 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-databind-2.15.0.jar.sha1 b/plugins/repository-azure/licenses/jackson-databind-2.15.0.jar.sha1 deleted file mode 100644 index 059bbf5747b97..0000000000000 --- a/plugins/repository-azure/licenses/jackson-databind-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-databind-2.15.1.jar.sha1 b/plugins/repository-azure/licenses/jackson-databind-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..47405b86d4a51 --- /dev/null +++ b/plugins/repository-azure/licenses/jackson-databind-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ac9ba74d208faf356e4719a49e59c6ea9237c01d \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.0.jar.sha1 b/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.0.jar.sha1 deleted file mode 100644 index aeacd683279b5..0000000000000 --- a/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -941dd9a1f2c505445b1d7d17e6fb10319fc84c0c \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.1.jar.sha1 b/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..e874b204a1042 --- /dev/null +++ b/plugins/repository-azure/licenses/jackson-dataformat-xml-2.15.1.jar.sha1 @@ -0,0 +1 @@ +66e5b7284865d1175dd63d0ffc0385f8810e0810 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.0.jar.sha1 b/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.0.jar.sha1 deleted file mode 100644 index b032e71822f47..0000000000000 --- a/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3c3a0cfba09271fab4603224f2c2e21c6ddf6dc4 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.1.jar.sha1 b/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..d8c64d8c48452 --- /dev/null +++ b/plugins/repository-azure/licenses/jackson-datatype-jsr310-2.15.1.jar.sha1 @@ -0,0 +1 @@ +21300ba63a6408bbd3505af4287d1b9a911a9718 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.0.jar.sha1 b/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.0.jar.sha1 deleted file mode 100644 index fcebd99b4f38b..0000000000000 --- a/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -86221c9bd8d02e5a964f83a92893ef7329257fa7 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.1.jar.sha1 b/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..81921a29b9d15 --- /dev/null +++ b/plugins/repository-azure/licenses/jackson-module-jaxb-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ab2078805370360d32c77f1ba46dc9098f84daa6 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/jackson-annotations-2.15.0.jar.sha1 b/plugins/repository-s3/licenses/jackson-annotations-2.15.0.jar.sha1 deleted file mode 100644 index 695645e221aeb..0000000000000 --- a/plugins/repository-s3/licenses/jackson-annotations-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/plugins/repository-s3/licenses/jackson-annotations-2.15.1.jar.sha1 b/plugins/repository-s3/licenses/jackson-annotations-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..a3ade4ff8dadc --- /dev/null +++ b/plugins/repository-s3/licenses/jackson-annotations-2.15.1.jar.sha1 @@ -0,0 +1 @@ +092a90d3739e970e03b5971839e4fe51f13c1fa3 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/jackson-databind-2.15.0.jar.sha1 b/plugins/repository-s3/licenses/jackson-databind-2.15.0.jar.sha1 deleted file mode 100644 index 059bbf5747b97..0000000000000 --- a/plugins/repository-s3/licenses/jackson-databind-2.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/jackson-databind-2.15.1.jar.sha1 b/plugins/repository-s3/licenses/jackson-databind-2.15.1.jar.sha1 new file mode 100644 index 0000000000000..47405b86d4a51 --- /dev/null +++ b/plugins/repository-s3/licenses/jackson-databind-2.15.1.jar.sha1 @@ -0,0 +1 @@ +ac9ba74d208faf356e4719a49e59c6ea9237c01d \ No newline at end of file From aa97acd300d0f7ffccfe3c30c2319b9f4e14c84a Mon Sep 17 00:00:00 2001 From: Akhil Manoj <50191787+AkM-2018@users.noreply.github.com> Date: Wed, 17 May 2023 23:08:54 +0530 Subject: [PATCH 06/10] Fixed typo (#7554) Signed-off-by: AkM-2018 --- .../admin/cluster/snapshots/create/CreateSnapshotRequest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/CreateSnapshotRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/CreateSnapshotRequest.java index dcd7ddc6674e0..63039ce7a921a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/CreateSnapshotRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/CreateSnapshotRequest.java @@ -71,7 +71,7 @@ *
  • must not contain whitespace (tabs or spaces)
  • *
  • must not contain comma (',')
  • *
  • must not contain hash sign ('#')
  • - *
  • must not start with underscore ('-')
  • + *
  • must not start with underscore ('_')
  • *
  • must be lowercase
  • *
  • must not contain invalid file name characters {@link org.opensearch.common.Strings#INVALID_FILENAME_CHARS}
  • * From 054cccdc171c52ba83ef602c1d9594f5f8309e20 Mon Sep 17 00:00:00 2001 From: Jay Deng Date: Wed, 17 May 2023 11:20:55 -0700 Subject: [PATCH 07/10] Change INDEX_SEARCHER threadpool to resizable to support task resource tracking (#7502) Signed-off-by: Jay Deng --- CHANGELOG.md | 1 + .../cluster/node/tasks/AbstractTasksIT.java | 190 ++++++++++++++++++ .../node/tasks/ConcurrentSearchTasksIT.java | 118 +++++++++++ .../admin/cluster/node/tasks/TasksIT.java | 150 +------------- .../org/opensearch/threadpool/ThreadPool.java | 7 +- .../tasks/RecordingTaskManagerListener.java | 12 +- .../TaskResourceTrackingServiceTests.java | 48 +++++ 7 files changed, 381 insertions(+), 145 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/AbstractTasksIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/ConcurrentSearchTasksIT.java diff --git a/CHANGELOG.md b/CHANGELOG.md index c44b69b7c96c9..88dc24762bf7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -121,6 +121,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enable `./gradlew build` on MacOS by disabling bcw tests ([#7303](https://github.com/opensearch-project/OpenSearch/pull/7303)) - Moved concurrent-search from sandbox plugin to server module behind feature flag ([#7203](https://github.com/opensearch-project/OpenSearch/pull/7203)) - Allow access to indices cache clear APIs for read only indexes ([#7303](https://github.com/opensearch-project/OpenSearch/pull/7303)) +- Changed concurrent-search threadpool type to be resizable and support task resource tracking ([#7502](https://github.com/opensearch-project/OpenSearch/pull/7502)) ### Deprecated diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/AbstractTasksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/AbstractTasksIT.java new file mode 100644 index 0000000000000..fcfe9cb0aab00 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/AbstractTasksIT.java @@ -0,0 +1,190 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.node.tasks; + +import org.opensearch.ExceptionsHelper; +import org.opensearch.ResourceNotFoundException; +import org.opensearch.action.admin.cluster.node.tasks.get.GetTaskResponse; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Strings; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.plugins.Plugin; +import org.opensearch.tasks.TaskId; +import org.opensearch.tasks.TaskInfo; +import org.opensearch.tasks.ThreadResourceInfo; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.tasks.MockTaskManager; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +/** + * Base IT test class for Tasks ITs + */ +abstract class AbstractTasksIT extends OpenSearchIntegTestCase { + + protected Map, RecordingTaskManagerListener> listeners = new HashMap<>(); + + @Override + protected Collection> getMockPlugins() { + Collection> mockPlugins = new ArrayList<>(super.getMockPlugins()); + mockPlugins.remove(MockTransportService.TestPlugin.class); + return mockPlugins; + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MockTransportService.TestPlugin.class, TestTaskPlugin.class); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(MockTaskManager.USE_MOCK_TASK_MANAGER_SETTING.getKey(), true) + .build(); + } + + @Override + public void tearDown() throws Exception { + for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { + ((MockTaskManager) internalCluster().getInstance(TransportService.class, entry.getKey().v1()).getTaskManager()).removeListener( + entry.getValue() + ); + } + listeners.clear(); + super.tearDown(); + } + + /** + * Registers recording task event listeners with the given action mask on all nodes + */ + protected void registerTaskManagerListeners(String actionMasks) { + for (String nodeName : internalCluster().getNodeNames()) { + DiscoveryNode node = internalCluster().getInstance(ClusterService.class, nodeName).localNode(); + RecordingTaskManagerListener listener = new RecordingTaskManagerListener(node.getId(), actionMasks.split(",")); + ((MockTaskManager) internalCluster().getInstance(TransportService.class, nodeName).getTaskManager()).addListener(listener); + RecordingTaskManagerListener oldListener = listeners.put(new Tuple<>(node.getName(), actionMasks), listener); + assertNull(oldListener); + } + } + + /** + * Resets all recording task event listeners with the given action mask on all nodes + */ + protected void resetTaskManagerListeners(String actionMasks) { + for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { + if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { + entry.getValue().reset(); + } + } + } + + /** + * Returns the number of events that satisfy the criteria across all nodes + * + * @param actionMasks action masks to match + * @return number of events that satisfy the criteria + */ + protected int numberOfEvents(String actionMasks, Function, Boolean> criteria) { + return findEvents(actionMasks, criteria).size(); + } + + /** + * Returns all events that satisfy the criteria across all nodes + * + * @param actionMasks action masks to match + * @return number of events that satisfy the criteria + */ + protected List findEvents(String actionMasks, Function, Boolean> criteria) { + List events = new ArrayList<>(); + for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { + if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { + for (Tuple taskEvent : entry.getValue().getEvents()) { + if (criteria.apply(taskEvent)) { + events.add(taskEvent.v2()); + } + } + } + } + return events; + } + + protected Map> getThreadStats(String actionMasks, TaskId taskId) { + for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { + if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { + for (Tuple>> threadStats : entry.getValue().getThreadStats()) { + if (taskId.equals(threadStats.v1())) { + return threadStats.v2(); + } + } + } + } + return new HashMap<>(); + } + + /** + * Asserts that all tasks in the tasks list have the same parentTask + */ + protected void assertParentTask(List tasks, TaskInfo parentTask) { + for (TaskInfo task : tasks) { + assertParentTask(task, parentTask); + } + } + + protected void assertParentTask(TaskInfo task, TaskInfo parentTask) { + assertTrue(task.getParentTaskId().isSet()); + assertEquals(parentTask.getTaskId().getNodeId(), task.getParentTaskId().getNodeId()); + assertTrue(Strings.hasLength(task.getParentTaskId().getNodeId())); + assertEquals(parentTask.getId(), task.getParentTaskId().getId()); + } + + protected void expectNotFound(ThrowingRunnable r) { + Exception e = expectThrows(Exception.class, r); + ResourceNotFoundException notFound = (ResourceNotFoundException) ExceptionsHelper.unwrap(e, ResourceNotFoundException.class); + if (notFound == null) { + throw new AssertionError("Expected " + ResourceNotFoundException.class.getSimpleName(), e); + } + } + + /** + * Fetch the task status from the list tasks API using it's "fallback to get from the task index" behavior. Asserts some obvious stuff + * about the fetched task and returns a map of it's status. + */ + protected GetTaskResponse expectFinishedTask(TaskId taskId) throws IOException { + GetTaskResponse response = client().admin().cluster().prepareGetTask(taskId).get(); + assertTrue("the task should have been completed before fetching", response.getTask().isCompleted()); + TaskInfo info = response.getTask().getTask(); + assertEquals(taskId, info.getTaskId()); + assertNull(info.getStatus()); // The test task doesn't have any status + return response; + } + + protected void indexDocumentsWithRefresh(String indexName, int numDocs) { + for (int i = 0; i < numDocs; i++) { + client().prepareIndex(indexName) + .setId("test_id_" + String.valueOf(i)) + .setSource("{\"foo_" + String.valueOf(i) + "\": \"bar_" + String.valueOf(i) + "\"}", XContentType.JSON) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/ConcurrentSearchTasksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/ConcurrentSearchTasksIT.java new file mode 100644 index 0000000000000..2b2421072e03b --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/ConcurrentSearchTasksIT.java @@ -0,0 +1,118 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.node.tasks; + +import org.hamcrest.MatcherAssert; +import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest; +import org.opensearch.action.search.SearchAction; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.FeatureFlagSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.tasks.TaskInfo; +import org.opensearch.tasks.ThreadResourceInfo; + +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.notNullValue; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; + +/** + * Integration tests for task management API with Concurrent Segment Search + * + * The way the test framework bootstraps the test cluster makes it difficult to parameterize the feature flag. + * Once concurrent search is moved behind a cluster setting we can parameterize these tests behind the setting. + */ +public class ConcurrentSearchTasksIT extends AbstractTasksIT { + + private static final int INDEX_SEARCHER_THREADS = 10; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put("thread_pool.index_searcher.size", INDEX_SEARCHER_THREADS) + .put("thread_pool.index_searcher.queue_size", INDEX_SEARCHER_THREADS) + .build(); + } + + private int getSegmentCount(String indexName) { + return client().admin() + .indices() + .segments(new IndicesSegmentsRequest(indexName)) + .actionGet() + .getIndices() + .get(indexName) + .getShards() + .get(0) + .getShards()[0].getSegments() + .size(); + } + + @Override + protected Settings featureFlagSettings() { + Settings.Builder featureSettings = Settings.builder(); + for (Setting builtInFlag : FeatureFlagSettings.BUILT_IN_FEATURE_FLAGS) { + featureSettings.put(builtInFlag.getKey(), builtInFlag.getDefaultRaw(Settings.EMPTY)); + } + featureSettings.put(FeatureFlags.CONCURRENT_SEGMENT_SEARCH, true); + return featureSettings.build(); + } + + /** + * Tests the number of threads that worked on a search task. + * + * Currently, we try to control concurrency by creating an index with 7 segments and rely on + * the way concurrent search creates leaf slices from segments. Once more concurrency controls are introduced + * we should improve this test to use those methods. + */ + public void testConcurrentSearchTaskTracking() { + final String INDEX_NAME = "test"; + final int NUM_SHARDS = 1; + final int NUM_DOCS = 7; + + registerTaskManagerListeners(SearchAction.NAME); // coordinator task + registerTaskManagerListeners(SearchAction.NAME + "[*]"); // shard task + createIndex( + INDEX_NAME, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, NUM_SHARDS) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + ); + ensureGreen(INDEX_NAME); // Make sure all shards are allocated to catch replication tasks + indexDocumentsWithRefresh(INDEX_NAME, NUM_DOCS); // Concurrent search requires >5 segments or >250,000 docs to have concurrency, so + // we index 7 docs flushing between each to create new segments + assertSearchResponse(client().prepareSearch(INDEX_NAME).setQuery(QueryBuilders.matchAllQuery()).get()); + + // the search operation should produce one coordinator task + List mainTask = findEvents(SearchAction.NAME, Tuple::v1); + assertEquals(1, mainTask.size()); + TaskInfo mainTaskInfo = mainTask.get(0); + + List shardTasks = findEvents(SearchAction.NAME + "[*]", Tuple::v1); + assertEquals(NUM_SHARDS, shardTasks.size()); // We should only have 1 shard search task per shard + for (TaskInfo taskInfo : shardTasks) { + MatcherAssert.assertThat(taskInfo.getParentTaskId(), notNullValue()); + assertEquals(mainTaskInfo.getTaskId(), taskInfo.getParentTaskId()); + + Map> threadStats = getThreadStats(SearchAction.NAME + "[*]", taskInfo.getTaskId()); + // Concurrent search forks each slice of 5 segments to different thread + assertEquals((int) Math.ceil(getSegmentCount(INDEX_NAME) / 5.0), threadStats.size()); + + // assert that all task descriptions have non-zero length + MatcherAssert.assertThat(taskInfo.getDescription().length(), greaterThan(0)); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/TasksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/TasksIT.java index 62180412dbf98..4d297555c3acc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/TasksIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/TasksIT.java @@ -32,10 +32,9 @@ package org.opensearch.action.admin.cluster.node.tasks; +import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; import org.opensearch.OpenSearchTimeoutException; -import org.opensearch.ExceptionsHelper; -import org.opensearch.ResourceNotFoundException; import org.opensearch.action.ActionFuture; import org.opensearch.action.ActionListener; import org.opensearch.action.TaskOperationFailure; @@ -57,15 +56,11 @@ import org.opensearch.action.support.WriteRequest; import org.opensearch.action.support.replication.ReplicationResponse; import org.opensearch.action.support.replication.TransportReplicationActionTests; -import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.Strings; import org.opensearch.common.collect.Tuple; import org.opensearch.common.regex.Regex; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.query.QueryBuilders; -import org.opensearch.plugins.Plugin; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskId; @@ -75,14 +70,9 @@ import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.tasks.MockTaskManager; import org.opensearch.test.tasks.MockTaskManagerListener; -import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.ReceiveTimeoutTransportException; import org.opensearch.transport.TransportService; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -96,12 +86,6 @@ import static java.util.Collections.emptyList; import static java.util.Collections.singleton; -import static org.opensearch.common.unit.TimeValue.timeValueMillis; -import static org.opensearch.common.unit.TimeValue.timeValueSeconds; -import static org.opensearch.http.HttpTransportSettings.SETTING_HTTP_MAX_HEADER_SIZE; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertFutureThrows; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; @@ -113,6 +97,12 @@ import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; +import static org.opensearch.common.unit.TimeValue.timeValueMillis; +import static org.opensearch.common.unit.TimeValue.timeValueSeconds; +import static org.opensearch.http.HttpTransportSettings.SETTING_HTTP_MAX_HEADER_SIZE; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertFutureThrows; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; /** * Integration tests for task management API @@ -120,29 +110,7 @@ * We need at least 2 nodes so we have a cluster-manager node a non-cluster-manager node */ @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 2) -public class TasksIT extends OpenSearchIntegTestCase { - - private Map, RecordingTaskManagerListener> listeners = new HashMap<>(); - - @Override - protected Collection> getMockPlugins() { - Collection> mockPlugins = new ArrayList<>(super.getMockPlugins()); - mockPlugins.remove(MockTransportService.TestPlugin.class); - return mockPlugins; - } - - @Override - protected Collection> nodePlugins() { - return Arrays.asList(MockTransportService.TestPlugin.class, TestTaskPlugin.class); - } - - @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) - .put(MockTaskManager.USE_MOCK_TASK_MANAGER_SETTING.getKey(), true) - .build(); - } +public class TasksIT extends AbstractTasksIT { public void testTaskCounts() { // Run only on data nodes @@ -940,106 +908,4 @@ public void onFailure(Exception e) { assertNotNull(response.getTask().getError()); assertNull(response.getTask().getResponse()); } - - @Override - public void tearDown() throws Exception { - for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { - ((MockTaskManager) internalCluster().getInstance(TransportService.class, entry.getKey().v1()).getTaskManager()).removeListener( - entry.getValue() - ); - } - listeners.clear(); - super.tearDown(); - } - - /** - * Registers recording task event listeners with the given action mask on all nodes - */ - private void registerTaskManagerListeners(String actionMasks) { - for (String nodeName : internalCluster().getNodeNames()) { - DiscoveryNode node = internalCluster().getInstance(ClusterService.class, nodeName).localNode(); - RecordingTaskManagerListener listener = new RecordingTaskManagerListener(node.getId(), actionMasks.split(",")); - ((MockTaskManager) internalCluster().getInstance(TransportService.class, nodeName).getTaskManager()).addListener(listener); - RecordingTaskManagerListener oldListener = listeners.put(new Tuple<>(node.getName(), actionMasks), listener); - assertNull(oldListener); - } - } - - /** - * Resets all recording task event listeners with the given action mask on all nodes - */ - private void resetTaskManagerListeners(String actionMasks) { - for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { - if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { - entry.getValue().reset(); - } - } - } - - /** - * Returns the number of events that satisfy the criteria across all nodes - * - * @param actionMasks action masks to match - * @return number of events that satisfy the criteria - */ - private int numberOfEvents(String actionMasks, Function, Boolean> criteria) { - return findEvents(actionMasks, criteria).size(); - } - - /** - * Returns all events that satisfy the criteria across all nodes - * - * @param actionMasks action masks to match - * @return number of events that satisfy the criteria - */ - private List findEvents(String actionMasks, Function, Boolean> criteria) { - List events = new ArrayList<>(); - for (Map.Entry, RecordingTaskManagerListener> entry : listeners.entrySet()) { - if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { - for (Tuple taskEvent : entry.getValue().getEvents()) { - if (criteria.apply(taskEvent)) { - events.add(taskEvent.v2()); - } - } - } - } - return events; - } - - /** - * Asserts that all tasks in the tasks list have the same parentTask - */ - private void assertParentTask(List tasks, TaskInfo parentTask) { - for (TaskInfo task : tasks) { - assertParentTask(task, parentTask); - } - } - - private void assertParentTask(TaskInfo task, TaskInfo parentTask) { - assertTrue(task.getParentTaskId().isSet()); - assertEquals(parentTask.getTaskId().getNodeId(), task.getParentTaskId().getNodeId()); - assertTrue(Strings.hasLength(task.getParentTaskId().getNodeId())); - assertEquals(parentTask.getId(), task.getParentTaskId().getId()); - } - - private void expectNotFound(ThrowingRunnable r) { - Exception e = expectThrows(Exception.class, r); - ResourceNotFoundException notFound = (ResourceNotFoundException) ExceptionsHelper.unwrap(e, ResourceNotFoundException.class); - if (notFound == null) { - throw new AssertionError("Expected " + ResourceNotFoundException.class.getSimpleName(), e); - } - } - - /** - * Fetch the task status from the list tasks API using it's "fallback to get from the task index" behavior. Asserts some obvious stuff - * about the fetched task and returns a map of it's status. - */ - private GetTaskResponse expectFinishedTask(TaskId taskId) throws IOException { - GetTaskResponse response = client().admin().cluster().prepareGetTask(taskId).get(); - assertTrue("the task should have been completed before fetching", response.getTask().isCompleted()); - TaskInfo info = response.getTask().getTask(); - assertEquals(taskId, info.getTaskId()); - assertNull(info.getStatus()); // The test task doesn't have any status - return response; - } } diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index be2b77998e016..2c91d5aa33090 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -183,7 +183,7 @@ public static ThreadPoolType fromType(String type) { map.put(Names.REMOTE_PURGE, ThreadPoolType.SCALING); map.put(Names.REMOTE_REFRESH, ThreadPoolType.SCALING); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { - map.put(Names.INDEX_SEARCHER, ThreadPoolType.FIXED); + map.put(Names.INDEX_SEARCHER, ThreadPoolType.RESIZABLE); } THREAD_POOL_TYPES = Collections.unmodifiableMap(map); } @@ -268,7 +268,10 @@ public ThreadPool( new ScalingExecutorBuilder(Names.REMOTE_REFRESH, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)) ); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { - builders.put(Names.INDEX_SEARCHER, new FixedExecutorBuilder(settings, Names.INDEX_SEARCHER, allocatedProcessors, 1000, false)); + builders.put( + Names.INDEX_SEARCHER, + new ResizableExecutorBuilder(settings, Names.INDEX_SEARCHER, allocatedProcessors, 1000, runnableTaskListener) + ); } for (final ExecutorBuilder builder : customBuilders) { diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/RecordingTaskManagerListener.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/RecordingTaskManagerListener.java index 9bd44185baf24..768a6c73af380 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/RecordingTaskManagerListener.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/RecordingTaskManagerListener.java @@ -35,12 +35,15 @@ import org.opensearch.common.collect.Tuple; import org.opensearch.common.regex.Regex; import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskId; import org.opensearch.tasks.TaskInfo; +import org.opensearch.tasks.ThreadResourceInfo; import org.opensearch.test.tasks.MockTaskManagerListener; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; /** @@ -52,6 +55,7 @@ public class RecordingTaskManagerListener implements MockTaskManagerListener { private String localNodeId; private List> events = new ArrayList<>(); + private List>>> threadStats = new ArrayList<>(); public RecordingTaskManagerListener(String localNodeId, String... actionMasks) { this.actionMasks = actionMasks; @@ -68,7 +72,9 @@ public synchronized void onTaskRegistered(Task task) { @Override public synchronized void onTaskUnregistered(Task task) { if (Regex.simpleMatch(actionMasks, task.getAction())) { - events.add(new Tuple<>(false, task.taskInfo(localNodeId, true))); + TaskInfo taskInfo = task.taskInfo(localNodeId, true); + events.add(new Tuple<>(false, taskInfo)); + threadStats.add(new Tuple<>(taskInfo.getTaskId(), task.getResourceStats())); } } @@ -82,6 +88,10 @@ public synchronized List> getEvents() { return Collections.unmodifiableList(new ArrayList<>(events)); } + public synchronized List>>> getThreadStats() { + return List.copyOf(threadStats); + } + public synchronized List getRegistrationEvents() { List events = this.events.stream().filter(Tuple::v1).map(Tuple::v2).collect(Collectors.toList()); return Collections.unmodifiableList(events); diff --git a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java index 8ba23c5d3219c..03257ee2a0a84 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java @@ -20,8 +20,13 @@ import org.opensearch.threadpool.ThreadPool; import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import static org.opensearch.tasks.ResourceStats.CPU; import static org.opensearch.tasks.ResourceStats.MEMORY; import static org.opensearch.tasks.TaskResourceTrackingService.TASK_ID; @@ -88,6 +93,49 @@ public void testStopTrackingHandlesCurrentActiveThread() { assertTrue(task.getResourceStats().get(threadId).get(0).getResourceUsageInfo().getStatsInfo().get(MEMORY).getTotalValue() > 0); } + /** + * Test if taskResourceTrackingService properly tracks resource usage when multiple threads work on the same task + */ + public void testStartingTrackingHandlesMultipleThreadsPerTask() throws InterruptedException { + ExecutorService executor = threadPool.executor(ThreadPool.Names.GENERIC); + taskResourceTrackingService.setTaskResourceTrackingEnabled(true); + Task task = new SearchTask(1, "test", "test", () -> "Test", TaskId.EMPTY_TASK_ID, new HashMap<>()); + taskResourceTrackingService.startTracking(task); + int numTasks = randomIntBetween(2, 100); + for (int i = 0; i < numTasks; i++) { + executor.execute(() -> { + long threadId = Thread.currentThread().getId(); + taskResourceTrackingService.taskExecutionStartedOnThread(task.getId(), threadId); + // The same thread may pick up multiple runnables for the same task id + assertEquals(1, task.getResourceStats().get(threadId).stream().filter(ThreadResourceInfo::isActive).count()); + taskResourceTrackingService.taskExecutionFinishedOnThread(task.getId(), threadId); + }); + } + executor.shutdown(); + while (true) { + try { + if (executor.awaitTermination(1, TimeUnit.MINUTES)) break; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + Map> stats = task.getResourceStats(); + int numExecutions = 0; + for (Long threadId : stats.keySet()) { + for (ThreadResourceInfo info : task.getResourceStats().get(threadId)) { + assertTrue(info.getResourceUsageInfo().getStatsInfo().get(MEMORY).getTotalValue() > 0); + assertTrue(info.getResourceUsageInfo().getStatsInfo().get(CPU).getTotalValue() > 0); + assertFalse(info.isActive()); + numExecutions++; + } + + } + assertTrue(task.getTotalResourceStats().getCpuTimeInNanos() > 0); + assertTrue(task.getTotalResourceStats().getMemoryInBytes() > 0); + // Each execution of a runnable should record an entry in resourceStats even if it's the same thread + assertEquals(numTasks, numExecutions); + } + private void verifyThreadContextFixedHeaders(String key, String value) { assertEquals(threadPool.getThreadContext().getHeader(key), value); assertEquals(threadPool.getThreadContext().getTransient(key), value); From be0047e838ade4ecf7d8867298bbe8b4f0034834 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 17 May 2023 14:08:20 -0700 Subject: [PATCH 08/10] Segment Replication - Fixes for test SegmentReplicationIT.testReplicaHasDiffFilesThanPrimary (#7513) * Segment Replication - Force segrep round on replicas once recovery completes This change fixes a rare case a replica can be left stale if its primary does not publish a new checkpoint after the replica completes recovery. The replica will perform a forced round of segment replication as part of the recovery process to catch up, however if it receives a new checkpoint during that time it will be dropped. If subsequently the primary stops indexing/writing new segments, the replica is left stale. Signed-off-by: Marc Handalian * Fix all shards failed. Signed-off-by: Marc Handalian * Ensure NRT engine is open before triggering refresh. Without this check tests will fail because RefreshMetricUpdater#afterRefresh will get invoked without a call to beforeRefresh. This happens when readerManager.maybeRefresh is invoked on a closed engine. Signed-off-by: Marc Handalian Fix NPE caused when getLatestReplicationCheckpoint is returned as null. Signed-off-by: Marc Handalian * Ensure segrep is enabled on index for SegmentReplicationTargetService's IndexEventListener implementations. Signed-off-by: Marc Handalian Add integration test. Signed-off-by: Marc Handalian Spotless. Signed-off-by: Marc Handalian Fix testDropPrimaryDuringReplication Signed-off-by: Marc Handalian * Remove unused NO_OP versions of service components. Signed-off-by: Marc Handalian * Add more unit tests. Signed-off-by: Marc Handalian * PR feedback. Signed-off-by: Marc Handalian --------- Signed-off-by: Marc Handalian --- .../replication/SegmentReplicationIT.java | 43 +++++- .../index/engine/NRTReplicationEngine.java | 1 + .../SegmentReplicationSourceService.java | 65 ++++----- .../SegmentReplicationTargetService.java | 123 +++++++++++------- ...ClusterStateServiceRandomUpdatesTests.java | 4 +- .../SegmentReplicationSourceServiceTests.java | 42 +++++- .../SegmentReplicationTargetServiceTests.java | 92 ++++++++++++- .../snapshots/SnapshotResiliencyTests.java | 2 +- 8 files changed, 273 insertions(+), 99 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index 6906f61607ba0..0d982b92cd80e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -43,6 +43,7 @@ import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.allocation.command.CancelAllocationCommand; import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.common.lease.Releasable; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -773,9 +774,10 @@ public void testReplicaHasDiffFilesThanPrimary() throws Exception { final int docCount = scaledRandomIntBetween(10, 200); for (int i = 0; i < docCount; i++) { client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + // Refresh, this should trigger round of segment replication refresh(INDEX_NAME); } - // Refresh, this should trigger round of segment replication + ensureGreen(INDEX_NAME); waitForSearchableDocs(docCount, primaryNode, replicaNode); verifyStoreContent(); final IndexShard replicaAfterFailure = getIndexShard(replicaNode, INDEX_NAME); @@ -1208,4 +1210,43 @@ public void testPitCreatedOnReplica() throws Exception { currentFiles = List.of(replicaShard.store().directory().listAll()); assertFalse("Files should be cleaned up", currentFiles.containsAll(snapshottedSegments)); } + + /** + * This tests that if a primary receives docs while a replica is performing round of segrep during recovery + * the replica will catch up to latest checkpoint once recovery completes without requiring an additional primary refresh/flush. + */ + public void testPrimaryReceivesDocsDuringReplicaRecovery() throws Exception { + final List nodes = new ArrayList<>(); + final String primaryNode = internalCluster().startNode(); + nodes.add(primaryNode); + final Settings settings = Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build(); + createIndex(INDEX_NAME, settings); + ensureGreen(INDEX_NAME); + // start a replica node, initially will be empty with no shard assignment. + final String replicaNode = internalCluster().startNode(); + nodes.add(replicaNode); + + // index a doc. + client().prepareIndex(INDEX_NAME).setId("1").setSource("foo", randomInt()).get(); + refresh(INDEX_NAME); + + CountDownLatch latch = new CountDownLatch(1); + // block replication + try (final Releasable ignored = blockReplication(List.of(replicaNode), latch)) { + // update to add replica, initiating recovery, this will get stuck at last step + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureYellow(INDEX_NAME); + // index another doc while blocked, this would not get replicated to replica. + client().prepareIndex(INDEX_NAME).setId("2").setSource("foo2", randomInt()).get(); + refresh(INDEX_NAME); + } + ensureGreen(INDEX_NAME); + waitForSearchableDocs(2, nodes); + } + } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 5fc2863b662fd..50b5fbb8596a6 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -337,6 +337,7 @@ public void refresh(String source) throws EngineException { @Override public boolean maybeRefresh(String source) throws EngineException { + ensureOpen(); try { return readerManager.maybeRefresh(); } catch (IOException e) { diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceService.java index 37a0b420b2d59..0e62a4320e3f3 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceService.java @@ -49,24 +49,6 @@ */ public class SegmentReplicationSourceService extends AbstractLifecycleComponent implements ClusterStateListener, IndexEventListener { - // Empty Implementation, only required while Segment Replication is under feature flag. - public static final SegmentReplicationSourceService NO_OP = new SegmentReplicationSourceService() { - @Override - public void clusterChanged(ClusterChangedEvent event) { - // NoOp; - } - - @Override - public void beforeIndexShardClosed(ShardId shardId, IndexShard indexShard, Settings indexSettings) { - // NoOp; - } - - @Override - public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting oldRouting, ShardRouting newRouting) { - // NoOp; - } - }; - private static final Logger logger = LogManager.getLogger(SegmentReplicationSourceService.class); private final RecoverySettings recoverySettings; private final TransportService transportService; @@ -85,22 +67,16 @@ public static class Actions { private final OngoingSegmentReplications ongoingSegmentReplications; - // Used only for empty implementation. - private SegmentReplicationSourceService() { - recoverySettings = null; - ongoingSegmentReplications = null; - transportService = null; - indicesService = null; - } - - public SegmentReplicationSourceService( + protected SegmentReplicationSourceService( IndicesService indicesService, TransportService transportService, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + OngoingSegmentReplications ongoingSegmentReplications ) { this.transportService = transportService; this.indicesService = indicesService; this.recoverySettings = recoverySettings; + this.ongoingSegmentReplications = ongoingSegmentReplications; transportService.registerRequestHandler( Actions.GET_CHECKPOINT_INFO, ThreadPool.Names.GENERIC, @@ -113,7 +89,14 @@ public SegmentReplicationSourceService( GetSegmentFilesRequest::new, new GetSegmentFilesRequestHandler() ); - this.ongoingSegmentReplications = new OngoingSegmentReplications(indicesService, recoverySettings); + } + + public SegmentReplicationSourceService( + IndicesService indicesService, + TransportService transportService, + RecoverySettings recoverySettings + ) { + this(indicesService, transportService, recoverySettings, new OngoingSegmentReplications(indicesService, recoverySettings)); } private class CheckpointInfoRequestHandler implements TransportRequestHandler { @@ -170,15 +153,19 @@ public void clusterChanged(ClusterChangedEvent event) { // we need to ensure its state has cleared up in ongoing replications. if (event.routingTableChanged()) { for (IndexService indexService : indicesService) { - for (IndexShard indexShard : indexService) { - if (indexShard.routingEntry().primary()) { - final IndexMetadata indexMetadata = indexService.getIndexSettings().getIndexMetadata(); - final Set inSyncAllocationIds = new HashSet<>(indexMetadata.inSyncAllocationIds(indexShard.shardId().id())); - if (indexShard.isPrimaryMode()) { - final Set shardTrackerInSyncIds = indexShard.getReplicationGroup().getInSyncAllocationIds(); - inSyncAllocationIds.addAll(shardTrackerInSyncIds); + if (indexService.getIndexSettings().isSegRepEnabled()) { + for (IndexShard indexShard : indexService) { + if (indexShard.routingEntry().primary()) { + final IndexMetadata indexMetadata = indexService.getIndexSettings().getIndexMetadata(); + final Set inSyncAllocationIds = new HashSet<>( + indexMetadata.inSyncAllocationIds(indexShard.shardId().id()) + ); + if (indexShard.isPrimaryMode()) { + final Set shardTrackerInSyncIds = indexShard.getReplicationGroup().getInSyncAllocationIds(); + inSyncAllocationIds.addAll(shardTrackerInSyncIds); + } + ongoingSegmentReplications.clearOutOfSyncIds(indexShard.shardId(), inSyncAllocationIds); } - ongoingSegmentReplications.clearOutOfSyncIds(indexShard.shardId(), inSyncAllocationIds); } } } @@ -212,7 +199,7 @@ protected void doClose() throws IOException { */ @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { - if (indexShard != null) { + if (indexShard != null && indexShard.indexSettings().isSegRepEnabled()) { ongoingSegmentReplications.cancel(indexShard, "shard is closed"); } } @@ -222,7 +209,7 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh */ @Override public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting oldRouting, ShardRouting newRouting) { - if (indexShard != null && oldRouting.primary() == false && newRouting.primary()) { + if (indexShard != null && indexShard.indexSettings().isSegRepEnabled() && oldRouting.primary() == false && newRouting.primary()) { ongoingSegmentReplications.cancel(indexShard.routingEntry().allocationId().getId(), "Relocating primary shard."); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index 1858449e13ae8..59af48abf3db8 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -63,37 +63,10 @@ public class SegmentReplicationTargetService implements IndexEventListener { private final SegmentReplicationSourceFactory sourceFactory; - private final Map latestReceivedCheckpoint = ConcurrentCollections.newConcurrentMap(); + protected final Map latestReceivedCheckpoint = ConcurrentCollections.newConcurrentMap(); private final IndicesService indicesService; - // Empty Implementation, only required while Segment Replication is under feature flag. - public static final SegmentReplicationTargetService NO_OP = new SegmentReplicationTargetService() { - @Override - public void beforeIndexShardClosed(ShardId shardId, IndexShard indexShard, Settings indexSettings) { - // NoOp; - } - - @Override - public synchronized void onNewCheckpoint(ReplicationCheckpoint receivedCheckpoint, IndexShard replicaShard) { - // noOp; - } - - @Override - public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting oldRouting, ShardRouting newRouting) { - // noOp; - } - }; - - // Used only for empty implementation. - private SegmentReplicationTargetService() { - threadPool = null; - recoverySettings = null; - onGoingReplications = null; - sourceFactory = null; - indicesService = null; - } - public ReplicationRef get(long replicationId) { return onGoingReplications.get(replicationId); } @@ -114,10 +87,28 @@ public SegmentReplicationTargetService( final TransportService transportService, final SegmentReplicationSourceFactory sourceFactory, final IndicesService indicesService + ) { + this( + threadPool, + recoverySettings, + transportService, + sourceFactory, + indicesService, + new ReplicationCollection<>(logger, threadPool) + ); + } + + public SegmentReplicationTargetService( + final ThreadPool threadPool, + final RecoverySettings recoverySettings, + final TransportService transportService, + final SegmentReplicationSourceFactory sourceFactory, + final IndicesService indicesService, + final ReplicationCollection ongoingSegmentReplications ) { this.threadPool = threadPool; this.recoverySettings = recoverySettings; - this.onGoingReplications = new ReplicationCollection<>(logger, threadPool); + this.onGoingReplications = ongoingSegmentReplications; this.sourceFactory = sourceFactory; this.indicesService = indicesService; @@ -140,8 +131,20 @@ public SegmentReplicationTargetService( */ @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { - if (indexShard != null) { + if (indexShard != null && indexShard.indexSettings().isSegRepEnabled()) { onGoingReplications.cancelForShard(shardId, "shard closed"); + latestReceivedCheckpoint.remove(shardId); + } + } + + /** + * Replay any received checkpoint while replica was recovering. This does not need to happen + * for primary relocations because they recover from translog. + */ + @Override + public void afterIndexShardStarted(IndexShard indexShard) { + if (indexShard.indexSettings().isSegRepEnabled() && indexShard.routingEntry().primary() == false) { + processLatestReceivedCheckpoint(indexShard, Thread.currentThread()); } } @@ -150,8 +153,9 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh */ @Override public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting oldRouting, ShardRouting newRouting) { - if (oldRouting != null && oldRouting.primary() == false && newRouting.primary()) { + if (oldRouting != null && indexShard.indexSettings().isSegRepEnabled() && oldRouting.primary() == false && newRouting.primary()) { onGoingReplications.cancelForShard(indexShard.shardId(), "shard has been promoted to primary"); + latestReceivedCheckpoint.remove(indexShard.shardId()); } } @@ -191,16 +195,18 @@ public SegmentReplicationState getSegmentReplicationState(ShardId shardId) { */ public synchronized void onNewCheckpoint(final ReplicationCheckpoint receivedCheckpoint, final IndexShard replicaShard) { logger.trace(() -> new ParameterizedMessage("Replica received new replication checkpoint from primary [{}]", receivedCheckpoint)); + // if the shard is in any state + if (replicaShard.state().equals(IndexShardState.CLOSED)) { + // ignore if shard is closed + logger.trace(() -> "Ignoring checkpoint, Shard is closed"); + return; + } + updateLatestReceivedCheckpoint(receivedCheckpoint, replicaShard); // Checks if replica shard is in the correct STARTED state to process checkpoints (avoids parallel replication events taking place) + // This check ensures we do not try to process a received checkpoint while the shard is still recovering, yet we stored the latest + // checkpoint to be replayed once the shard is Active. if (replicaShard.state().equals(IndexShardState.STARTED) == true) { // Checks if received checkpoint is already present and ahead then it replaces old received checkpoint - if (latestReceivedCheckpoint.get(replicaShard.shardId()) != null) { - if (receivedCheckpoint.isAheadOf(latestReceivedCheckpoint.get(replicaShard.shardId()))) { - latestReceivedCheckpoint.replace(replicaShard.shardId(), receivedCheckpoint); - } - } else { - latestReceivedCheckpoint.put(replicaShard.shardId(), receivedCheckpoint); - } SegmentReplicationTarget ongoingReplicationTarget = onGoingReplications.getOngoingReplicationTarget(replicaShard.shardId()); if (ongoingReplicationTarget != null) { if (ongoingReplicationTarget.getCheckpoint().getPrimaryTerm() < receivedCheckpoint.getPrimaryTerm()) { @@ -236,15 +242,7 @@ public void onReplicationDone(SegmentReplicationState state) { ); // if we received a checkpoint during the copy event that is ahead of this // try and process it. - if (latestReceivedCheckpoint.get(replicaShard.shardId()).isAheadOf(replicaShard.getLatestReplicationCheckpoint())) { - Runnable runnable = () -> onNewCheckpoint(latestReceivedCheckpoint.get(replicaShard.shardId()), replicaShard); - // Checks if we are using same thread and forks if necessary. - if (thread == Thread.currentThread()) { - threadPool.generic().execute(runnable); - } else { - runnable.run(); - } - } + processLatestReceivedCheckpoint(replicaShard, thread); } @Override @@ -269,6 +267,37 @@ public void onReplicationFailure( }); } + } else { + logger.trace( + () -> new ParameterizedMessage("Ignoring checkpoint, shard not started {} {}", receivedCheckpoint, replicaShard.state()) + ); + } + } + + // visible to tests + protected boolean processLatestReceivedCheckpoint(IndexShard replicaShard, Thread thread) { + final ReplicationCheckpoint latestPublishedCheckpoint = latestReceivedCheckpoint.get(replicaShard.shardId()); + if (latestPublishedCheckpoint != null && latestPublishedCheckpoint.isAheadOf(replicaShard.getLatestReplicationCheckpoint())) { + Runnable runnable = () -> onNewCheckpoint(latestReceivedCheckpoint.get(replicaShard.shardId()), replicaShard); + // Checks if we are using same thread and forks if necessary. + if (thread == Thread.currentThread()) { + threadPool.generic().execute(runnable); + } else { + runnable.run(); + } + return true; + } + return false; + } + + // visible to tests + protected void updateLatestReceivedCheckpoint(ReplicationCheckpoint receivedCheckpoint, IndexShard replicaShard) { + if (latestReceivedCheckpoint.get(replicaShard.shardId()) != null) { + if (receivedCheckpoint.isAheadOf(latestReceivedCheckpoint.get(replicaShard.shardId()))) { + latestReceivedCheckpoint.replace(replicaShard.shardId(), receivedCheckpoint); + } + } else { + latestReceivedCheckpoint.put(replicaShard.shardId(), receivedCheckpoint); } } diff --git a/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java b/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java index 22a5194b50f6d..9a0402423416b 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java +++ b/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java @@ -570,8 +570,8 @@ private IndicesClusterStateService createIndicesClusterStateService( clusterService, threadPool, SegmentReplicationCheckpointPublisher.EMPTY, - SegmentReplicationTargetService.NO_OP, - SegmentReplicationSourceService.NO_OP, + mock(SegmentReplicationTargetService.class), + mock(SegmentReplicationSourceService.class), recoveryTargetService, shardStateAction, null, diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceServiceTests.java index 41022b77b46e1..76481ebbecea3 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceServiceTests.java @@ -11,17 +11,23 @@ import org.apache.lucene.codecs.Codec; import org.opensearch.Version; import org.opensearch.action.ActionListener; +import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.ReplicationGroup; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.CopyStateTests; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.IndexSettingsModule; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.threadpool.TestThreadPool; @@ -32,10 +38,16 @@ import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.concurrent.TimeUnit; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_REPLICATION_TYPE_SETTING; public class SegmentReplicationSourceServiceTests extends OpenSearchTestCase { @@ -43,6 +55,8 @@ public class SegmentReplicationSourceServiceTests extends OpenSearchTestCase { private TestThreadPool testThreadPool; private TransportService transportService; private DiscoveryNode localNode; + private SegmentReplicationSourceService segmentReplicationSourceService; + private OngoingSegmentReplications ongoingSegmentReplications; @Override public void setUp() throws Exception { @@ -52,9 +66,22 @@ public void setUp() throws Exception { ShardId testShardId = mockIndexShard.shardId(); IndicesService mockIndicesService = mock(IndicesService.class); IndexService mockIndexService = mock(IndexService.class); + when(mockIndicesService.iterator()).thenReturn(List.of(mockIndexService).iterator()); when(mockIndicesService.indexServiceSafe(testShardId.getIndex())).thenReturn(mockIndexService); when(mockIndexService.getShard(testShardId.id())).thenReturn(mockIndexShard); - + when(mockIndexService.iterator()).thenReturn(List.of(mockIndexShard).iterator()); + final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings( + "index", + Settings.builder().put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT).build() + ); + when(mockIndexService.getIndexSettings()).thenReturn(indexSettings); + final ShardRouting routing = mock(ShardRouting.class); + when(routing.primary()).thenReturn(true); + when(mockIndexShard.routingEntry()).thenReturn(routing); + when(mockIndexShard.isPrimaryMode()).thenReturn(true); + final ReplicationGroup replicationGroup = mock(ReplicationGroup.class); + when(mockIndexShard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getInSyncAllocationIds()).thenReturn(Collections.emptySet()); // This mirrors the creation of the ReplicationCheckpoint inside CopyState testCheckpoint = new ReplicationCheckpoint( testShardId, @@ -81,10 +108,12 @@ public void setUp() throws Exception { final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); final RecoverySettings recoverySettings = new RecoverySettings(settings, clusterSettings); - SegmentReplicationSourceService segmentReplicationSourceService = new SegmentReplicationSourceService( + ongoingSegmentReplications = spy(new OngoingSegmentReplications(mockIndicesService, recoverySettings)); + segmentReplicationSourceService = new SegmentReplicationSourceService( mockIndicesService, transportService, - recoverySettings + recoverySettings, + ongoingSegmentReplications ); } @@ -132,6 +161,13 @@ public void onFailure(Exception e) { }); } + public void testPrimaryClearsOutOfSyncIds() { + final ClusterChangedEvent mock = mock(ClusterChangedEvent.class); + when(mock.routingTableChanged()).thenReturn(true); + segmentReplicationSourceService.clusterChanged(mock); + verify(ongoingSegmentReplications, times(1)).clearOutOfSyncIds(any(), any()); + } + private void executeGetCheckpointInfo(ActionListener listener) { final CheckpointInfoRequest request = new CheckpointInfoRequest(1L, "testAllocationId", localNode, testCheckpoint); transportService.sendRequest( diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index 357a88c27fc46..a3c016d5ba0df 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -13,6 +13,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.CancellableThreads; import org.opensearch.index.codec.CodecService; @@ -22,9 +23,13 @@ import org.opensearch.index.shard.IndexShardTestCase; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.replication.common.ReplicationCollection; import org.opensearch.indices.replication.common.ReplicationFailedException; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.transport.TransportService; import java.io.IOException; import java.util.List; @@ -32,15 +37,16 @@ import java.util.concurrent.TimeUnit; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; import static org.opensearch.indices.replication.SegmentReplicationState.Stage.CANCELLED; public class SegmentReplicationTargetServiceTests extends IndexShardTestCase { @@ -271,4 +277,78 @@ public void testRejectCheckpointOnShardPrimaryMode() throws IOException { verify(spy, times(0)).startReplication(any(), any(), any()); closeShards(primaryShard); } + + public void testAfterIndexShardStartedDoesNothingForDocrepIndex() throws IOException { + SegmentReplicationTargetService spy = spy(sut); + final IndexShard indexShard = newStartedShard(); + spy.afterIndexShardStarted(indexShard); + verify(spy, times(0)).processLatestReceivedCheckpoint(eq(replicaShard), any()); + closeShards(indexShard); + } + + public void testAfterIndexShardStartedProcessesLatestReceivedCheckpoint() { + sut.updateLatestReceivedCheckpoint(aheadCheckpoint, replicaShard); + SegmentReplicationTargetService spy = spy(sut); + doNothing().when(spy).onNewCheckpoint(any(), any()); + spy.afterIndexShardStarted(replicaShard); + verify(spy, times(1)).processLatestReceivedCheckpoint(eq(replicaShard), any()); + } + + public void testDoNotProcessLatestCheckpointIfItIsbehind() { + sut.updateLatestReceivedCheckpoint(replicaShard.getLatestReplicationCheckpoint(), replicaShard); + assertFalse(sut.processLatestReceivedCheckpoint(replicaShard, null)); + } + + public void testOnNewCheckpointInvokedOnClosedShardDoesNothing() throws IOException { + closeShards(replicaShard); + SegmentReplicationTargetService spy = spy(sut); + spy.onNewCheckpoint(aheadCheckpoint, replicaShard); + verify(spy, times(0)).updateLatestReceivedCheckpoint(any(), any()); + } + + public void testBeforeIndexShardClosed_DoesNothingForDocRepIndex() throws IOException { + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final IndicesService indicesService = mock(IndicesService.class); + final ReplicationCollection ongoingReplications = mock(ReplicationCollection.class); + final SegmentReplicationTargetService targetService = new SegmentReplicationTargetService( + threadPool, + new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), + mock(TransportService.class), + sourceFactory, + indicesService, + ongoingReplications + ); + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build(); + IndexShard shard = newStartedShard(false, settings); + targetService.beforeIndexShardClosed(shard.shardId(), shard, shard.indexSettings().getSettings()); + verifyNoInteractions(ongoingReplications); + closeShards(shard); + } + + public void testShardRoutingChanged_DoesNothingForDocRepIndex() throws IOException { + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final IndicesService indicesService = mock(IndicesService.class); + final ReplicationCollection ongoingReplications = mock(ReplicationCollection.class); + final SegmentReplicationTargetService targetService = new SegmentReplicationTargetService( + threadPool, + new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), + mock(TransportService.class), + sourceFactory, + indicesService, + ongoingReplications + ); + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build(); + IndexShard shard = newStartedShard(false, settings); + + targetService.shardRoutingChanged(shard, shard.routingEntry(), primaryShard.routingEntry()); + verifyNoInteractions(ongoingReplications); + closeShards(shard); + } + + public void testUpdateLatestReceivedCheckpoint() { + final SegmentReplicationTargetService spy = spy(sut); + sut.updateLatestReceivedCheckpoint(checkpoint, replicaShard); + sut.updateLatestReceivedCheckpoint(aheadCheckpoint, replicaShard); + assertEquals(sut.latestReceivedCheckpoint.get(replicaShard.shardId()), aheadCheckpoint); + } } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 3f5ef4b824afa..63ae57511dd34 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -1870,7 +1870,7 @@ public void onFailure(final Exception e) { new SegmentReplicationSourceFactory(transportService, recoverySettings, clusterService), indicesService ), - SegmentReplicationSourceService.NO_OP, + mock(SegmentReplicationSourceService.class), shardStateAction, new NodeMappingRefreshAction(transportService, metadataMappingService), repositoriesService, From c3690908a91dcb06646442a32ee5a49b87dc35c7 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Thu, 18 May 2023 09:43:21 +0530 Subject: [PATCH 09/10] [Remote Store] Add Remote store stats api (#7441) Signed-off-by: Varun Bansal --- .../client/RestHighLevelClientTests.java | 1 + .../rest-api-spec/api/remote_store.stats.json | 48 +++++ .../org/opensearch/action/ActionModule.java | 5 + .../remotestore/stats/RemoteStoreStats.java | 168 +++++++++++++++ .../stats/RemoteStoreStatsAction.java | 26 +++ .../stats/RemoteStoreStatsRequest.java | 61 ++++++ .../stats/RemoteStoreStatsRequestBuilder.java | 52 +++++ .../stats/RemoteStoreStatsResponse.java | 74 +++++++ .../TransportRemoteStoreStatsAction.java | 159 ++++++++++++++ .../remotestore/stats/package-info.java | 10 + .../opensearch/client/ClusterAdminClient.java | 7 + .../client/support/AbstractClient.java | 21 ++ .../remote/RemoteRefreshSegmentTracker.java | 151 ++++++++++++++ .../cluster/RestRemoteStoreStatsAction.java | 59 ++++++ .../stats/RemoteStoreStatsRequestTests.java | 69 +++++++ .../stats/RemoteStoreStatsResponseTests.java | 72 +++++++ .../stats/RemoteStoreStatsTestHelper.java | 103 ++++++++++ .../stats/RemoteStoreStatsTests.java | 88 ++++++++ .../TransportRemoteStoreStatsActionTests.java | 194 ++++++++++++++++++ .../RemoteRefreshSegmentTrackerTests.java | 78 +++++++ 20 files changed, 1446 insertions(+) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/remote_store.stats.json create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStats.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsAction.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequest.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestBuilder.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponse.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsAction.java create mode 100644 server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/package-info.java create mode 100644 server/src/main/java/org/opensearch/rest/action/admin/cluster/RestRemoteStoreStatsAction.java create mode 100644 server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestTests.java create mode 100644 server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponseTests.java create mode 100644 server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTestHelper.java create mode 100644 server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTests.java create mode 100644 server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsActionTests.java diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java index ddd17a9c3c214..85f329eb6e895 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java @@ -875,6 +875,7 @@ public void testApiNamingConventions() throws Exception { "nodes.reload_secure_settings", "search_shards", "remote_store.restore", + "remote_store.stats", "cluster.put_weighted_routing", "cluster.get_weighted_routing", "cluster.delete_weighted_routing", diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/remote_store.stats.json b/rest-api-spec/src/main/resources/rest-api-spec/api/remote_store.stats.json new file mode 100644 index 0000000000000..437a4439bbcb5 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/remote_store.stats.json @@ -0,0 +1,48 @@ +{ + "remote_store.stats":{ + "documentation":{ + "url": "https://opensearch.org/docs/latest/tuning-your-cluster/availability-and-recovery/remote", + "description":"Stats for remote store." + }, + "stability":"experimental", + "url":{ + "paths":[ + { + "path":"/_remotestore/stats/{index}", + "methods":[ + "GET" + ], + "parts":{ + "index":{ + "type":"string", + "description": "Index name to fetch stats" + } + } + }, + { + "path":"/_remotestore/stats/{index}/{shard_id}", + "methods":[ + "GET" + ], + "parts":{ + "index":{ + "type":"string", + "description":"Index name to fetch stats" + }, + "shard_id":{ + "type":"string", + "description":"Specific shard to fetch stats" + } + } + } + ] + }, + "params":{ + "timeout":{ + "type":"time", + "default":"10s", + "description":"Max time each individual stats request should take." + } + } + } +} diff --git a/server/src/main/java/org/opensearch/action/ActionModule.java b/server/src/main/java/org/opensearch/action/ActionModule.java index efbabf5f4a2a4..f7bc770fbad33 100644 --- a/server/src/main/java/org/opensearch/action/ActionModule.java +++ b/server/src/main/java/org/opensearch/action/ActionModule.java @@ -56,7 +56,9 @@ import org.opensearch.action.admin.cluster.node.reload.NodesReloadSecureSettingsAction; import org.opensearch.action.admin.cluster.node.reload.TransportNodesReloadSecureSettingsAction; import org.opensearch.action.admin.cluster.node.stats.NodesStatsAction; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsAction; import org.opensearch.action.admin.cluster.node.stats.TransportNodesStatsAction; +import org.opensearch.action.admin.cluster.remotestore.stats.TransportRemoteStoreStatsAction; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksAction; import org.opensearch.action.admin.cluster.node.tasks.cancel.TransportCancelTasksAction; import org.opensearch.action.admin.cluster.node.tasks.get.GetTaskAction; @@ -348,6 +350,7 @@ import org.opensearch.rest.action.admin.cluster.RestPutStoredScriptAction; import org.opensearch.rest.action.admin.cluster.RestReloadSecureSettingsAction; import org.opensearch.rest.action.admin.cluster.RestRemoteClusterInfoAction; +import org.opensearch.rest.action.admin.cluster.RestRemoteStoreStatsAction; import org.opensearch.rest.action.admin.cluster.RestRestoreRemoteStoreAction; import org.opensearch.rest.action.admin.cluster.RestRestoreSnapshotAction; import org.opensearch.rest.action.admin.cluster.RestSnapshotsStatusAction; @@ -582,6 +585,7 @@ public void reg actions.register(NodesInfoAction.INSTANCE, TransportNodesInfoAction.class); actions.register(RemoteInfoAction.INSTANCE, TransportRemoteInfoAction.class); actions.register(NodesStatsAction.INSTANCE, TransportNodesStatsAction.class); + actions.register(RemoteStoreStatsAction.INSTANCE, TransportRemoteStoreStatsAction.class); actions.register(NodesUsageAction.INSTANCE, TransportNodesUsageAction.class); actions.register(NodesHotThreadsAction.INSTANCE, TransportNodesHotThreadsAction.class); actions.register(ListTasksAction.INSTANCE, TransportListTasksAction.class); @@ -958,6 +962,7 @@ public void initRestHandlers(Supplier nodesInCluster) { // Remote Store APIs if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { + registerHandler.accept(new RestRemoteStoreStatsAction()); registerHandler.accept(new RestRestoreRemoteStoreAction()); } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStats.java new file mode 100644 index 0000000000000..4602e23eaf8e5 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStats.java @@ -0,0 +1,168 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; + +import java.io.IOException; + +/** + * Encapsulates all remote store stats + * + * @opensearch.internal + */ +public class RemoteStoreStats implements Writeable, ToXContentFragment { + + private final RemoteRefreshSegmentTracker.Stats remoteSegmentUploadShardStats; + + public RemoteStoreStats(RemoteRefreshSegmentTracker.Stats remoteSegmentUploadShardStats) { + this.remoteSegmentUploadShardStats = remoteSegmentUploadShardStats; + } + + public RemoteStoreStats(StreamInput in) throws IOException { + remoteSegmentUploadShardStats = in.readOptionalWriteable(RemoteRefreshSegmentTracker.Stats::new); + } + + public RemoteRefreshSegmentTracker.Stats getStats() { + return remoteSegmentUploadShardStats; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject() + .field(Fields.SHARD_ID, remoteSegmentUploadShardStats.shardId) + + .field(Fields.LOCAL_REFRESH_TIMESTAMP, remoteSegmentUploadShardStats.localRefreshTimeMs) + .field(Fields.REMOTE_REFRESH_TIMESTAMP, remoteSegmentUploadShardStats.remoteRefreshTimeMs) + .field(Fields.REFRESH_LAG, remoteSegmentUploadShardStats.localRefreshNumber - remoteSegmentUploadShardStats.remoteRefreshNumber) + .field(Fields.BYTES_LAG, remoteSegmentUploadShardStats.bytesLag) + + .field(Fields.BACKPRESSURE_REJECTION_COUNT, remoteSegmentUploadShardStats.rejectionCount) + .field(Fields.CONSECUTIVE_FAILURE_COUNT, remoteSegmentUploadShardStats.consecutiveFailuresCount); + + builder.startObject(Fields.TOTAL_REMOTE_REFRESH); + builder.field(SubFields.STARTED, remoteSegmentUploadShardStats.totalUploadsStarted) + .field(SubFields.SUCCEEDED, remoteSegmentUploadShardStats.totalUploadsSucceeded) + .field(SubFields.FAILED, remoteSegmentUploadShardStats.totalUploadsFailed); + builder.endObject(); + + builder.startObject(Fields.TOTAL_UPLOADS_IN_BYTES); + builder.field(SubFields.STARTED, remoteSegmentUploadShardStats.uploadBytesStarted) + .field(SubFields.SUCCEEDED, remoteSegmentUploadShardStats.uploadBytesSucceeded) + .field(SubFields.FAILED, remoteSegmentUploadShardStats.uploadBytesFailed); + builder.endObject(); + + builder.startObject(Fields.REMOTE_REFRESH_SIZE_IN_BYTES); + builder.field(SubFields.LAST_SUCCESSFUL, remoteSegmentUploadShardStats.lastSuccessfulRemoteRefreshBytes); + builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadBytesMovingAverage); + builder.endObject(); + + builder.startObject(Fields.UPLOAD_LATENCY_IN_BYTES_PER_SEC); + builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadBytesPerSecMovingAverage); + builder.endObject(); + builder.startObject(Fields.REMOTE_REFRESH_LATENCY_IN_MILLIS); + builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadTimeMovingAverage); + builder.endObject(); + builder.endObject(); + + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalWriteable(remoteSegmentUploadShardStats); + } + + /** + * Fields for remote store stats response + */ + static final class Fields { + static final String SHARD_ID = "shard_id"; + + /** + * Last successful local refresh timestamp in milliseconds + */ + static final String LOCAL_REFRESH_TIMESTAMP = "local_refresh_timestamp_in_millis"; + + /** + * Last successful remote refresh timestamp in milliseconds + */ + static final String REMOTE_REFRESH_TIMESTAMP = "remote_refresh_timestamp_in_millis"; + + /** + * Lag in terms of bytes b/w local and remote store + */ + static final String BYTES_LAG = "bytes_lag"; + + /** + * No of refresh remote store is lagging behind local + */ + static final String REFRESH_LAG = "refresh_lag"; + + /** + * Total write rejections due to remote store backpressure kick in + */ + static final String BACKPRESSURE_REJECTION_COUNT = "backpressure_rejection_count"; + + /** + * No of consecutive remote refresh failures without a single success since the first failures + */ + static final String CONSECUTIVE_FAILURE_COUNT = "consecutive_failure_count"; + + /** + * Represents the number of remote refreshes + */ + static final String TOTAL_REMOTE_REFRESH = "total_remote_refresh"; + + /** + * Represents the total uploads to remote store in bytes + */ + static final String TOTAL_UPLOADS_IN_BYTES = "total_uploads_in_bytes"; + + /** + * Represents the size of new data to be uploaded as part of a refresh + */ + static final String REMOTE_REFRESH_SIZE_IN_BYTES = "remote_refresh_size_in_bytes"; + + /** + * Represents the speed of remote store uploads in bytes per sec + */ + static final String UPLOAD_LATENCY_IN_BYTES_PER_SEC = "upload_latency_in_bytes_per_sec"; + + /** + * Time taken by a single remote refresh + */ + static final String REMOTE_REFRESH_LATENCY_IN_MILLIS = "remote_refresh_latency_in_millis"; + } + + /** + * Reusable sub fields for {@link Fields} + */ + static final class SubFields { + static final String STARTED = "started"; + static final String SUCCEEDED = "succeeded"; + static final String FAILED = "failed"; + + /** + * Moving avg over last N values stat for a {@link Fields} + */ + static final String MOVING_AVG = "moving_avg"; + + /** + * Most recent successful attempt stat for a {@link Fields} + */ + static final String LAST_SUCCESSFUL = "last_successful"; + } + +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsAction.java new file mode 100644 index 0000000000000..98c6e863b7b61 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsAction.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.ActionType; + +/** + * Remote store stats action + * + * @opensearch.internal + */ +public class RemoteStoreStatsAction extends ActionType { + + public static final RemoteStoreStatsAction INSTANCE = new RemoteStoreStatsAction(); + public static final String NAME = "cluster:monitor/_remotestore/stats"; + + private RemoteStoreStatsAction() { + super(NAME, RemoteStoreStatsResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequest.java new file mode 100644 index 0000000000000..5de14f77a7c37 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequest.java @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.support.broadcast.BroadcastRequest; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Encapsulates all remote store stats + * + * @opensearch.internal + */ +public class RemoteStoreStatsRequest extends BroadcastRequest { + + private String[] shards; + private boolean local = false; + + public RemoteStoreStatsRequest() { + super((String[]) null); + shards = new String[0]; + } + + public RemoteStoreStatsRequest(StreamInput in) throws IOException { + super(in); + shards = in.readStringArray(); + local = in.readBoolean(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArray(shards); + out.writeBoolean(local); + } + + public RemoteStoreStatsRequest shards(String... shards) { + this.shards = shards; + return this; + } + + public String[] shards() { + return this.shards; + } + + public void local(boolean local) { + this.local = local; + } + + public boolean local() { + return local; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestBuilder.java new file mode 100644 index 0000000000000..c31e4a1fd6178 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestBuilder.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.support.broadcast.BroadcastOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; +import org.opensearch.common.unit.TimeValue; + +/** + * Builder for RemoteStoreStatsRequest + * + * @opensearch.internal + */ +public class RemoteStoreStatsRequestBuilder extends BroadcastOperationRequestBuilder< + RemoteStoreStatsRequest, + RemoteStoreStatsResponse, + RemoteStoreStatsRequestBuilder> { + + public RemoteStoreStatsRequestBuilder(OpenSearchClient client, RemoteStoreStatsAction action) { + super(client, action, new RemoteStoreStatsRequest()); + } + + /** + * Sets timeout of request. + */ + public final RemoteStoreStatsRequestBuilder setTimeout(TimeValue timeout) { + request.timeout(timeout); + return this; + } + + /** + * Sets shards preference of request. + */ + public final RemoteStoreStatsRequestBuilder setShards(String... shards) { + request.shards(shards); + return this; + } + + /** + * Sets local shards preference of request. + */ + public final RemoteStoreStatsRequestBuilder setLocal(boolean local) { + request.local(local); + return this; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponse.java new file mode 100644 index 0000000000000..5a49f90b42b07 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponse.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.support.DefaultShardOperationFailedException; +import org.opensearch.action.support.broadcast.BroadcastResponse; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +/** + * Remote Store stats response + * + * @opensearch.internal + */ +public class RemoteStoreStatsResponse extends BroadcastResponse { + + private final RemoteStoreStats[] shards; + + public RemoteStoreStatsResponse(StreamInput in) throws IOException { + super(in); + shards = in.readArray(RemoteStoreStats::new, RemoteStoreStats[]::new); + } + + public RemoteStoreStatsResponse( + RemoteStoreStats[] shards, + int totalShards, + int successfulShards, + int failedShards, + List shardFailures + ) { + super(totalShards, successfulShards, failedShards, shardFailures); + this.shards = shards; + } + + public RemoteStoreStats[] getShards() { + return this.shards; + } + + public RemoteStoreStats getAt(int position) { + return shards[position]; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeArray(shards); + } + + @Override + protected void addCustomXContentFields(XContentBuilder builder, Params params) throws IOException { + builder.startArray("stats"); + for (RemoteStoreStats shard : shards) { + shard.toXContent(builder, params); + } + builder.endArray(); + } + + @Override + public String toString() { + return Strings.toString(XContentType.JSON, this, true, false); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsAction.java new file mode 100644 index 0000000000000..1d6361b3acfd5 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsAction.java @@ -0,0 +1,159 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.DefaultShardOperationFailedException; +import org.opensearch.action.support.broadcast.node.TransportBroadcastByNodeAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.routing.PlainShardsIterator; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardsIterator; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.index.IndexService; +import org.opensearch.index.remote.RemoteRefreshSegmentPressureService; +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.ShardNotFoundException; +import org.opensearch.indices.IndicesService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Encapsulates all remote store stats + * + * @opensearch.internal + */ +public class TransportRemoteStoreStatsAction extends TransportBroadcastByNodeAction< + RemoteStoreStatsRequest, + RemoteStoreStatsResponse, + RemoteStoreStats> { + + private final IndicesService indicesService; + private final RemoteRefreshSegmentPressureService remoteRefreshSegmentPressureService; + + @Inject + public TransportRemoteStoreStatsAction( + ClusterService clusterService, + TransportService transportService, + IndicesService indicesService, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver, + RemoteRefreshSegmentPressureService remoteRefreshSegmentPressureService + ) { + super( + RemoteStoreStatsAction.NAME, + clusterService, + transportService, + actionFilters, + indexNameExpressionResolver, + RemoteStoreStatsRequest::new, + ThreadPool.Names.MANAGEMENT + ); + this.indicesService = indicesService; + this.remoteRefreshSegmentPressureService = remoteRefreshSegmentPressureService; + } + + /** + * Status goes across *all* shards. + */ + @Override + protected ShardsIterator shards(ClusterState clusterState, RemoteStoreStatsRequest request, String[] concreteIndices) { + final List newShardRoutings = new ArrayList<>(); + if (request.shards().length > 0) { + clusterState.routingTable().allShards(concreteIndices).getShardRoutings().forEach(shardRouting -> { + if (Arrays.asList(request.shards()).contains(Integer.toString(shardRouting.shardId().id()))) { + newShardRoutings.add(shardRouting); + } + }); + } else { + newShardRoutings.addAll(clusterState.routingTable().allShards(concreteIndices).getShardRoutings()); + } + return new PlainShardsIterator( + newShardRoutings.stream() + .filter(shardRouting -> remoteRefreshSegmentPressureService.getRemoteRefreshSegmentTracker(shardRouting.shardId()) != null) + .filter( + shardRouting -> !request.local() + || (shardRouting.currentNodeId() == null + || shardRouting.currentNodeId().equals(clusterState.getNodes().getLocalNodeId())) + ) + .filter(ShardRouting::primary) + .filter(shardRouting -> indicesService.indexService(shardRouting.index()).getIndexSettings().isRemoteStoreEnabled()) + .collect(Collectors.toList()) + ); + } + + @Override + protected ClusterBlockException checkGlobalBlock(ClusterState state, RemoteStoreStatsRequest request) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } + + @Override + protected ClusterBlockException checkRequestBlock(ClusterState state, RemoteStoreStatsRequest request, String[] concreteIndices) { + return state.blocks().indicesBlockedException(ClusterBlockLevel.METADATA_READ, concreteIndices); + } + + @Override + protected RemoteStoreStats readShardResult(StreamInput in) throws IOException { + return new RemoteStoreStats(in); + } + + @Override + protected RemoteStoreStatsResponse newResponse( + RemoteStoreStatsRequest request, + int totalShards, + int successfulShards, + int failedShards, + List responses, + List shardFailures, + ClusterState clusterState + ) { + return new RemoteStoreStatsResponse( + responses.toArray(new RemoteStoreStats[0]), + totalShards, + successfulShards, + failedShards, + shardFailures + ); + } + + @Override + protected RemoteStoreStatsRequest readRequestFrom(StreamInput in) throws IOException { + return new RemoteStoreStatsRequest(in); + } + + @Override + protected RemoteStoreStats shardOperation(RemoteStoreStatsRequest request, ShardRouting shardRouting) { + IndexService indexService = indicesService.indexServiceSafe(shardRouting.shardId().getIndex()); + IndexShard indexShard = indexService.getShard(shardRouting.shardId().id()); + // if we don't have the routing entry yet, we need it stats wise, we treat it as if the shard is not ready yet + if (indexShard.routingEntry() == null) { + throw new ShardNotFoundException(indexShard.shardId()); + } + + RemoteRefreshSegmentTracker remoteRefreshSegmentTracker = remoteRefreshSegmentPressureService.getRemoteRefreshSegmentTracker( + indexShard.shardId() + ); + assert Objects.nonNull(remoteRefreshSegmentTracker); + + return new RemoteStoreStats(remoteRefreshSegmentTracker.stats()); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/package-info.java new file mode 100644 index 0000000000000..80bd96e3245d9 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/remotestore/stats/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Remote store stats transport handler. */ +package org.opensearch.action.admin.cluster.remotestore.stats; diff --git a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java index 39aa1ed80d0d7..153485c36d77c 100644 --- a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java +++ b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java @@ -59,6 +59,9 @@ import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequestBuilder; import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsRequest; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsRequestBuilder; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsResponse; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequestBuilder; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksResponse; @@ -315,6 +318,10 @@ public interface ClusterAdminClient extends OpenSearchClient { */ NodesStatsRequestBuilder prepareNodesStats(String... nodesIds); + void remoteStoreStats(RemoteStoreStatsRequest request, ActionListener listener); + + RemoteStoreStatsRequestBuilder prepareRemoteStoreStats(String index, String shardId); + /** * Returns top N hot-threads samples per node. The hot-threads are only * sampled for the node ids specified in the request. Nodes usage of the diff --git a/server/src/main/java/org/opensearch/client/support/AbstractClient.java b/server/src/main/java/org/opensearch/client/support/AbstractClient.java index 6a38254a03e71..c67b4493ba4b9 100644 --- a/server/src/main/java/org/opensearch/client/support/AbstractClient.java +++ b/server/src/main/java/org/opensearch/client/support/AbstractClient.java @@ -73,6 +73,10 @@ import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequestBuilder; import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsAction; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsRequest; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsRequestBuilder; +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsResponse; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksAction; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequestBuilder; @@ -887,6 +891,23 @@ public NodesStatsRequestBuilder prepareNodesStats(String... nodesIds) { return new NodesStatsRequestBuilder(this, NodesStatsAction.INSTANCE).setNodesIds(nodesIds); } + @Override + public void remoteStoreStats(final RemoteStoreStatsRequest request, final ActionListener listener) { + execute(RemoteStoreStatsAction.INSTANCE, request, listener); + } + + @Override + public RemoteStoreStatsRequestBuilder prepareRemoteStoreStats(String index, String shardId) { + RemoteStoreStatsRequestBuilder remoteStoreStatsRequestBuilder = new RemoteStoreStatsRequestBuilder( + this, + RemoteStoreStatsAction.INSTANCE + ).setIndices(index); + if (shardId != null) { + remoteStoreStatsRequestBuilder.setShards(shardId); + } + return remoteStoreStatsRequestBuilder; + } + @Override public ActionFuture nodesUsage(final NodesUsageRequest request) { return execute(NodesUsageAction.INSTANCE, request); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteRefreshSegmentTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteRefreshSegmentTracker.java index 109eadf34509b..ac366806d9894 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteRefreshSegmentTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteRefreshSegmentTracker.java @@ -8,11 +8,16 @@ package org.opensearch.index.remote; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; import org.opensearch.common.util.MovingAverage; import org.opensearch.common.util.Streak; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.shard.ShardId; +import java.io.IOException; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -62,6 +67,11 @@ public class RemoteRefreshSegmentTracker { */ private volatile long timeMsLag; + /** + * Keeps track of the total bytes of segment files which were uploaded to remote store during last successful remote refresh + */ + private volatile long lastSuccessfulRemoteRefreshBytes; + /** * Cumulative sum of size in bytes of segment files for which upload has started during remote refresh. */ @@ -163,6 +173,8 @@ public RemoteRefreshSegmentTracker( uploadBytesMovingAverageReference = new AtomicReference<>(new MovingAverage(uploadBytesMovingAverageWindowSize)); uploadBytesPerSecMovingAverageReference = new AtomicReference<>(new MovingAverage(uploadBytesPerSecMovingAverageWindowSize)); uploadTimeMsMovingAverageReference = new AtomicReference<>(new MovingAverage(uploadTimeMsMovingAverageWindowSize)); + + latestLocalFileNameLengthMap = new HashMap<>(); } ShardId getShardId() { @@ -357,6 +369,7 @@ boolean isUploadBytesAverageReady() { } void addUploadBytes(long size) { + lastSuccessfulRemoteRefreshBytes = size; synchronized (uploadBytesMutex) { this.uploadBytesMovingAverageReference.get().record(size); } @@ -422,4 +435,142 @@ void updateUploadTimeMsMovingAverageWindowSize(int updatedSize) { this.uploadTimeMsMovingAverageReference.set(this.uploadTimeMsMovingAverageReference.get().copyWithSize(updatedSize)); } } + + public RemoteRefreshSegmentTracker.Stats stats() { + return new RemoteRefreshSegmentTracker.Stats( + shardId, + localRefreshSeqNo, + localRefreshTimeMs, + remoteRefreshSeqNo, + remoteRefreshTimeMs, + uploadBytesStarted, + uploadBytesSucceeded, + uploadBytesFailed, + totalUploadsStarted, + totalUploadsSucceeded, + totalUploadsFailed, + rejectionCount.get(), + failures.length(), + lastSuccessfulRemoteRefreshBytes, + uploadBytesMovingAverageReference.get().getAverage(), + uploadBytesPerSecMovingAverageReference.get().getAverage(), + uploadTimeMsMovingAverageReference.get().getAverage(), + getBytesLag() + ); + } + + /** + * Represents the tracker's state as seen in the stats API. + * + * @opensearch.internal + */ + public static class Stats implements Writeable { + + public final ShardId shardId; + public final long localRefreshNumber; + public final long localRefreshTimeMs; + public final long remoteRefreshNumber; + public final long remoteRefreshTimeMs; + public final long uploadBytesStarted; + public final long uploadBytesFailed; + public final long uploadBytesSucceeded; + public final long totalUploadsStarted; + public final long totalUploadsFailed; + public final long totalUploadsSucceeded; + public final long rejectionCount; + public final long consecutiveFailuresCount; + public final long lastSuccessfulRemoteRefreshBytes; + public final double uploadBytesMovingAverage; + public final double uploadBytesPerSecMovingAverage; + public final double uploadTimeMovingAverage; + public final long bytesLag; + + public Stats( + ShardId shardId, + long localRefreshNumber, + long localRefreshTimeMs, + long remoteRefreshNumber, + long remoteRefreshTimeMs, + long uploadBytesStarted, + long uploadBytesSucceeded, + long uploadBytesFailed, + long totalUploadsStarted, + long totalUploadsSucceeded, + long totalUploadsFailed, + long rejectionCount, + long consecutiveFailuresCount, + long lastSuccessfulRemoteRefreshBytes, + double uploadBytesMovingAverage, + double uploadBytesPerSecMovingAverage, + double uploadTimeMovingAverage, + long bytesLag + ) { + this.shardId = shardId; + this.localRefreshNumber = localRefreshNumber; + this.localRefreshTimeMs = localRefreshTimeMs; + this.remoteRefreshNumber = remoteRefreshNumber; + this.remoteRefreshTimeMs = remoteRefreshTimeMs; + this.uploadBytesStarted = uploadBytesStarted; + this.uploadBytesFailed = uploadBytesFailed; + this.uploadBytesSucceeded = uploadBytesSucceeded; + this.totalUploadsStarted = totalUploadsStarted; + this.totalUploadsFailed = totalUploadsFailed; + this.totalUploadsSucceeded = totalUploadsSucceeded; + this.rejectionCount = rejectionCount; + this.consecutiveFailuresCount = consecutiveFailuresCount; + this.lastSuccessfulRemoteRefreshBytes = lastSuccessfulRemoteRefreshBytes; + this.uploadBytesMovingAverage = uploadBytesMovingAverage; + this.uploadBytesPerSecMovingAverage = uploadBytesPerSecMovingAverage; + this.uploadTimeMovingAverage = uploadTimeMovingAverage; + this.bytesLag = bytesLag; + } + + public Stats(StreamInput in) throws IOException { + try { + this.shardId = new ShardId(in); + this.localRefreshNumber = in.readLong(); + this.localRefreshTimeMs = in.readLong(); + this.remoteRefreshNumber = in.readLong(); + this.remoteRefreshTimeMs = in.readLong(); + this.uploadBytesStarted = in.readLong(); + this.uploadBytesFailed = in.readLong(); + this.uploadBytesSucceeded = in.readLong(); + this.totalUploadsStarted = in.readLong(); + this.totalUploadsFailed = in.readLong(); + this.totalUploadsSucceeded = in.readLong(); + this.rejectionCount = in.readLong(); + this.consecutiveFailuresCount = in.readLong(); + this.lastSuccessfulRemoteRefreshBytes = in.readLong(); + this.uploadBytesMovingAverage = in.readDouble(); + this.uploadBytesPerSecMovingAverage = in.readDouble(); + this.uploadTimeMovingAverage = in.readDouble(); + this.bytesLag = in.readLong(); + } catch (IOException e) { + throw e; + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + shardId.writeTo(out); + out.writeLong(localRefreshNumber); + out.writeLong(localRefreshTimeMs); + out.writeLong(remoteRefreshNumber); + out.writeLong(remoteRefreshTimeMs); + out.writeLong(uploadBytesStarted); + out.writeLong(uploadBytesFailed); + out.writeLong(uploadBytesSucceeded); + out.writeLong(totalUploadsStarted); + out.writeLong(totalUploadsFailed); + out.writeLong(totalUploadsSucceeded); + out.writeLong(rejectionCount); + out.writeLong(consecutiveFailuresCount); + out.writeLong(lastSuccessfulRemoteRefreshBytes); + out.writeDouble(uploadBytesMovingAverage); + out.writeDouble(uploadBytesPerSecMovingAverage); + out.writeDouble(uploadTimeMovingAverage); + out.writeLong(bytesLag); + } + } + } diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestRemoteStoreStatsAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestRemoteStoreStatsAction.java new file mode 100644 index 0000000000000..9fc2b4ab19bf1 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestRemoteStoreStatsAction.java @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsRequest; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import static java.util.Arrays.asList; +import static java.util.Collections.unmodifiableList; +import static org.opensearch.rest.RestRequest.Method.GET; + +/** + * Rest action for fetching remote store stats + * + * @opensearch.internal + */ +public class RestRemoteStoreStatsAction extends BaseRestHandler { + + @Override + public List routes() { + return unmodifiableList( + asList(new Route(GET, "/_remotestore/stats/{index}"), new Route(GET, "/_remotestore/stats/{index}/{shard_id}")) + ); + } + + @Override + public String getName() { + return "remote_store_stats"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + String index = request.param("index"); + String shardId = request.param("shard_id"); + boolean local = Objects.equals(request.param("local"), "true"); + RemoteStoreStatsRequest remoteStoreStatsRequest = new RemoteStoreStatsRequest(); + if (index != null) { + remoteStoreStatsRequest.indices(index); + } + if (shardId != null) { + remoteStoreStatsRequest.shards(shardId); + } + remoteStoreStatsRequest.local(local); + return channel -> client.admin().cluster().remoteStoreStats(remoteStoreStatsRequest, new RestToXContentListener<>(channel)); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestTests.java new file mode 100644 index 0000000000000..8f0a6bba791f6 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsRequestTests.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class RemoteStoreStatsRequestTests extends OpenSearchTestCase { + + public void testAddIndexName() throws Exception { + RemoteStoreStatsRequest request = new RemoteStoreStatsRequest(); + request.indices("random-index-name"); + RemoteStoreStatsRequest deserializedRequest = roundTripRequest(request); + assertRequestsEqual(request, deserializedRequest); + } + + public void testAddShardId() throws Exception { + RemoteStoreStatsRequest request = new RemoteStoreStatsRequest(); + request.indices("random-index-name"); + request.shards("0"); + RemoteStoreStatsRequest deserializedRequest = roundTripRequest(request); + assertRequestsEqual(request, deserializedRequest); + } + + public void testAddLocalShardsOnly() throws Exception { + RemoteStoreStatsRequest request = new RemoteStoreStatsRequest(); + request.indices("random-index-name"); + request.local(true); + RemoteStoreStatsRequest deserializedRequest = roundTripRequest(request); + assertRequestsEqual(request, deserializedRequest); + } + + public void testAddShardIdAndLocalShardsOnly() throws Exception { + RemoteStoreStatsRequest request = new RemoteStoreStatsRequest(); + request.indices("random-index-name"); + request.shards("0"); + request.local(true); + RemoteStoreStatsRequest deserializedRequest = roundTripRequest(request); + assertRequestsEqual(request, deserializedRequest); + } + + /** + * Serialize and deserialize a request. + * @param request A request to serialize. + * @return The deserialized, "round-tripped" request. + */ + private static RemoteStoreStatsRequest roundTripRequest(RemoteStoreStatsRequest request) throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + request.writeTo(out); + try (StreamInput in = out.bytes().streamInput()) { + return new RemoteStoreStatsRequest(in); + } + } + } + + private static void assertRequestsEqual(RemoteStoreStatsRequest request1, RemoteStoreStatsRequest request2) { + assertThat(request1.indices(), equalTo(request2.indices())); + assertThat(request1.shards(), equalTo(request2.shards())); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponseTests.java new file mode 100644 index 0000000000000..7d8e1ad5c7016 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsResponseTests.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.action.support.DefaultShardOperationFailedException; +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; +import org.opensearch.index.shard.ShardId; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.ArrayList; +import java.util.Map; + +import static org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsTestHelper.compareStatsResponse; +import static org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsTestHelper.createPressureTrackerStats; +import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; + +public class RemoteStoreStatsResponseTests extends OpenSearchTestCase { + private ThreadPool threadPool; + private ShardId shardId; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("remote_store_stats_test"); + shardId = new ShardId("index", "uuid", 0); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSerialization() throws Exception { + RemoteRefreshSegmentTracker.Stats pressureTrackerStats = createPressureTrackerStats(shardId); + RemoteStoreStats stats = new RemoteStoreStats(pressureTrackerStats); + RemoteStoreStatsResponse statsResponse = new RemoteStoreStatsResponse( + new RemoteStoreStats[] { stats }, + 1, + 1, + 0, + new ArrayList() + ); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + statsResponse.toXContent(builder, EMPTY_PARAMS); + Map jsonResponseObject = XContentHelper.convertToMap(BytesReference.bytes(builder), false, builder.contentType()) + .v2(); + + ArrayList> statsObjectArray = (ArrayList>) jsonResponseObject.get("stats"); + assertEquals(statsObjectArray.size(), 1); + Map statsObject = statsObjectArray.get(0); + Map shardsObject = (Map) jsonResponseObject.get("_shards"); + + assertEquals(shardsObject.get("total"), 1); + assertEquals(shardsObject.get("successful"), 1); + assertEquals(shardsObject.get("failed"), 0); + compareStatsResponse(statsObject, pressureTrackerStats); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTestHelper.java b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTestHelper.java new file mode 100644 index 0000000000000..950048fe67f9d --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTestHelper.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; +import org.opensearch.index.shard.ShardId; + +import java.util.Map; + +import static org.opensearch.test.OpenSearchTestCase.assertEquals; +import static org.opensearch.test.OpenSearchTestCase.randomIntBetween; + +/** + * Helper utilities for Remote Store stats tests + */ +public class RemoteStoreStatsTestHelper { + static RemoteRefreshSegmentTracker.Stats createPressureTrackerStats(ShardId shardId) { + return new RemoteRefreshSegmentTracker.Stats( + shardId, + 3, + System.nanoTime() / 1_000_000L + randomIntBetween(10, 100), + 2, + System.nanoTime() / 1_000_000L + randomIntBetween(10, 100), + 10, + 5, + 5, + 10, + 5, + 5, + 3, + 2, + 5, + 2, + 3, + 4, + 9 + ); + } + + static void compareStatsResponse(Map statsObject, RemoteRefreshSegmentTracker.Stats pressureTrackerStats) { + assertEquals(statsObject.get(RemoteStoreStats.Fields.SHARD_ID), pressureTrackerStats.shardId.toString()); + assertEquals(statsObject.get(RemoteStoreStats.Fields.LOCAL_REFRESH_TIMESTAMP), (int) pressureTrackerStats.localRefreshTimeMs); + assertEquals( + statsObject.get(RemoteStoreStats.Fields.REFRESH_LAG), + (int) (pressureTrackerStats.localRefreshNumber - pressureTrackerStats.remoteRefreshNumber) + ); + assertEquals(statsObject.get(RemoteStoreStats.Fields.REMOTE_REFRESH_TIMESTAMP), (int) pressureTrackerStats.remoteRefreshTimeMs); + assertEquals(statsObject.get(RemoteStoreStats.Fields.BYTES_LAG), (int) pressureTrackerStats.bytesLag); + + assertEquals(statsObject.get(RemoteStoreStats.Fields.BACKPRESSURE_REJECTION_COUNT), (int) pressureTrackerStats.rejectionCount); + assertEquals( + statsObject.get(RemoteStoreStats.Fields.CONSECUTIVE_FAILURE_COUNT), + (int) pressureTrackerStats.consecutiveFailuresCount + ); + + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_UPLOADS_IN_BYTES)).get(RemoteStoreStats.SubFields.STARTED), + (int) pressureTrackerStats.uploadBytesStarted + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_UPLOADS_IN_BYTES)).get(RemoteStoreStats.SubFields.SUCCEEDED), + (int) pressureTrackerStats.uploadBytesSucceeded + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_UPLOADS_IN_BYTES)).get(RemoteStoreStats.SubFields.FAILED), + (int) pressureTrackerStats.uploadBytesFailed + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.REMOTE_REFRESH_SIZE_IN_BYTES)).get(RemoteStoreStats.SubFields.MOVING_AVG), + pressureTrackerStats.uploadBytesMovingAverage + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.REMOTE_REFRESH_SIZE_IN_BYTES)).get(RemoteStoreStats.SubFields.LAST_SUCCESSFUL), + (int) pressureTrackerStats.lastSuccessfulRemoteRefreshBytes + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.UPLOAD_LATENCY_IN_BYTES_PER_SEC)).get(RemoteStoreStats.SubFields.MOVING_AVG), + pressureTrackerStats.uploadBytesPerSecMovingAverage + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_REMOTE_REFRESH)).get(RemoteStoreStats.SubFields.STARTED), + (int) pressureTrackerStats.totalUploadsStarted + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_REMOTE_REFRESH)).get(RemoteStoreStats.SubFields.SUCCEEDED), + (int) pressureTrackerStats.totalUploadsSucceeded + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.TOTAL_REMOTE_REFRESH)).get(RemoteStoreStats.SubFields.FAILED), + (int) pressureTrackerStats.totalUploadsFailed + ); + assertEquals( + ((Map) statsObject.get(RemoteStoreStats.Fields.REMOTE_REFRESH_LATENCY_IN_MILLIS)).get(RemoteStoreStats.SubFields.MOVING_AVG), + pressureTrackerStats.uploadTimeMovingAverage + ); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTests.java new file mode 100644 index 0000000000000..b3cb4864c3b7f --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/RemoteStoreStatsTests.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; +import org.opensearch.index.shard.ShardId; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.Map; + +import static org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsTestHelper.compareStatsResponse; +import static org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsTestHelper.createPressureTrackerStats; +import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; + +public class RemoteStoreStatsTests extends OpenSearchTestCase { + private ThreadPool threadPool; + private ShardId shardId; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("remote_store_stats_test"); + shardId = new ShardId("index", "uuid", 0); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testXContentBuilder() throws IOException { + RemoteRefreshSegmentTracker.Stats pressureTrackerStats = createPressureTrackerStats(shardId); + RemoteStoreStats stats = new RemoteStoreStats(pressureTrackerStats); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + stats.toXContent(builder, EMPTY_PARAMS); + Map jsonObject = XContentHelper.convertToMap(BytesReference.bytes(builder), false, builder.contentType()).v2(); + compareStatsResponse(jsonObject, pressureTrackerStats); + } + + public void testSerialization() throws Exception { + RemoteRefreshSegmentTracker.Stats pressureTrackerStats = createPressureTrackerStats(shardId); + RemoteStoreStats stats = new RemoteStoreStats(pressureTrackerStats); + try (BytesStreamOutput out = new BytesStreamOutput()) { + stats.writeTo(out); + try (StreamInput in = out.bytes().streamInput()) { + RemoteStoreStats deserializedStats = new RemoteStoreStats(in); + assertEquals(deserializedStats.getStats().shardId.toString(), stats.getStats().shardId.toString()); + assertEquals(deserializedStats.getStats().localRefreshNumber, stats.getStats().localRefreshNumber); + assertEquals(deserializedStats.getStats().localRefreshTimeMs, stats.getStats().localRefreshTimeMs); + assertEquals(deserializedStats.getStats().remoteRefreshNumber, stats.getStats().remoteRefreshNumber); + assertEquals(deserializedStats.getStats().remoteRefreshTimeMs, stats.getStats().remoteRefreshTimeMs); + assertEquals(deserializedStats.getStats().uploadBytesStarted, stats.getStats().uploadBytesStarted); + assertEquals(deserializedStats.getStats().uploadBytesSucceeded, stats.getStats().uploadBytesSucceeded); + assertEquals(deserializedStats.getStats().uploadBytesFailed, stats.getStats().uploadBytesFailed); + assertEquals(deserializedStats.getStats().totalUploadsStarted, stats.getStats().totalUploadsStarted); + assertEquals(deserializedStats.getStats().totalUploadsFailed, stats.getStats().totalUploadsFailed); + assertEquals(deserializedStats.getStats().totalUploadsSucceeded, stats.getStats().totalUploadsSucceeded); + assertEquals(deserializedStats.getStats().rejectionCount, stats.getStats().rejectionCount); + assertEquals(deserializedStats.getStats().consecutiveFailuresCount, stats.getStats().consecutiveFailuresCount); + assertEquals(deserializedStats.getStats().uploadBytesMovingAverage, stats.getStats().uploadBytesMovingAverage, 0); + assertEquals( + deserializedStats.getStats().uploadBytesPerSecMovingAverage, + stats.getStats().uploadBytesPerSecMovingAverage, + 0 + ); + assertEquals(deserializedStats.getStats().uploadTimeMovingAverage, stats.getStats().uploadTimeMovingAverage, 0); + assertEquals(deserializedStats.getStats().bytesLag, stats.getStats().bytesLag); + } + } + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsActionTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsActionTests.java new file mode 100644 index 0000000000000..086c5331fbc80 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/remotestore/stats/TransportRemoteStoreStatsActionTests.java @@ -0,0 +1,194 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.remotestore.stats; + +import org.mockito.Mockito; +import org.opensearch.Version; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.PlainShardsIterator; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardsIterator; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.remote.RemoteRefreshSegmentPressureService; +import org.opensearch.index.remote.RemoteRefreshSegmentTracker; +import org.opensearch.index.shard.IndexShardTestCase; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.FeatureFlagSetter; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.transport.TransportService; + +import java.util.Collections; +import java.util.stream.Collectors; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY; + +public class TransportRemoteStoreStatsActionTests extends IndexShardTestCase { + private IndicesService indicesService; + private RemoteRefreshSegmentPressureService pressureService; + private IndexMetadata remoteStoreIndexMetadata; + private TransportService transportService; + private ClusterService clusterService; + private TransportRemoteStoreStatsAction statsAction; + private DiscoveryNode localNode; + private static final Index INDEX = new Index("testIndex", "testUUID"); + + @Override + public void setUp() throws Exception { + super.setUp(); + indicesService = mock(IndicesService.class); + IndexService indexService = mock(IndexService.class); + clusterService = mock(ClusterService.class); + pressureService = mock(RemoteRefreshSegmentPressureService.class); + MockTransport mockTransport = new MockTransport(); + localNode = new DiscoveryNode("node0", buildNewFakeTransportAddress(), Version.CURRENT); + remoteStoreIndexMetadata = IndexMetadata.builder(INDEX.getName()) + .settings( + settings(Version.CURRENT).put(SETTING_INDEX_UUID, INDEX.getUUID()) + .put(SETTING_REMOTE_STORE_ENABLED, true) + .put(SETTING_REMOTE_STORE_REPOSITORY, "my-test-repo") + .build() + ) + .numberOfShards(2) + .numberOfReplicas(1) + .build(); + transportService = mockTransport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + x -> localNode, + null, + Collections.emptySet() + ); + + when(pressureService.getRemoteRefreshSegmentTracker(any())).thenReturn(mock(RemoteRefreshSegmentTracker.class)); + when(indicesService.indexService(INDEX)).thenReturn(indexService); + when(indexService.getIndexSettings()).thenReturn(new IndexSettings(remoteStoreIndexMetadata, Settings.EMPTY)); + statsAction = new TransportRemoteStoreStatsAction( + clusterService, + transportService, + indicesService, + mock(ActionFilters.class), + mock(IndexNameExpressionResolver.class), + pressureService + ); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + transportService.close(); + indicesService.close(); + clusterService.close(); + } + + public void testOnlyPrimaryShards() throws Exception { + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + RoutingTable routingTable = RoutingTable.builder().addAsNew(remoteStoreIndexMetadata).build(); + Metadata metadata = Metadata.builder().put(remoteStoreIndexMetadata, false).build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).routingTable(routingTable).build(); + + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + when(clusterService.state()).thenReturn(clusterState); + + ShardsIterator shardsIterator = statsAction.shards( + clusterService.state(), + new RemoteStoreStatsRequest(), + new String[] { INDEX.getName() } + ); + + assertEquals(shardsIterator.size(), 2); + } + + public void testOnlyLocalShards() throws Exception { + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + String[] concreteIndices = new String[] { INDEX.getName() }; + RoutingTable routingTable = spy(RoutingTable.builder().addAsNew(remoteStoreIndexMetadata).build()); + doReturn(new PlainShardsIterator(routingTable.allShards(INDEX.getName()).stream().map(Mockito::spy).collect(Collectors.toList()))) + .when(routingTable) + .allShards(concreteIndices); + routingTable.allShards(concreteIndices) + .forEach( + shardRouting -> doReturn(shardRouting.shardId().id() == 0 ? "node1" : localNode.getId()).when(shardRouting).currentNodeId() + ); + Metadata metadata = Metadata.builder().put(remoteStoreIndexMetadata, false).build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(metadata) + .routingTable(routingTable) + .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())) + .build(); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + when(clusterService.state()).thenReturn(clusterState); + RemoteStoreStatsRequest remoteStoreStatsRequest = new RemoteStoreStatsRequest(); + remoteStoreStatsRequest.local(true); + ShardsIterator shardsIterator = statsAction.shards(clusterService.state(), remoteStoreStatsRequest, concreteIndices); + + assertEquals(shardsIterator.size(), 1); + } + + public void testOnlyRemoteStoreEnabledShards() throws Exception { + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + Index NEW_INDEX = new Index("newIndex", "newUUID"); + IndexMetadata indexMetadataWithoutRemoteStore = IndexMetadata.builder(NEW_INDEX.getName()) + .settings( + settings(Version.CURRENT).put(SETTING_INDEX_UUID, NEW_INDEX.getUUID()).put(SETTING_REMOTE_STORE_ENABLED, false).build() + ) + .numberOfShards(2) + .numberOfReplicas(1) + .build(); + + RoutingTable routingTable = RoutingTable.builder() + .addAsNew(remoteStoreIndexMetadata) + .addAsNew(indexMetadataWithoutRemoteStore) + .build(); + Metadata metadata = Metadata.builder().put(remoteStoreIndexMetadata, false).put(indexMetadataWithoutRemoteStore, false).build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).routingTable(routingTable).build(); + + IndexService newIndexService = mock(IndexService.class); + + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + when(clusterService.state()).thenReturn(clusterState); + when(indicesService.indexService(NEW_INDEX)).thenReturn(newIndexService); + when(newIndexService.getIndexSettings()).thenReturn(new IndexSettings(indexMetadataWithoutRemoteStore, Settings.EMPTY)); + + ShardsIterator shardsIterator = statsAction.shards( + clusterService.state(), + new RemoteStoreStatsRequest(), + new String[] { INDEX.getName() } + ); + + assertEquals(shardsIterator.size(), 2); + } +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteRefreshSegmentTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteRefreshSegmentTrackerTests.java index 48bc28e3a497d..d939d235ab3c7 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteRefreshSegmentTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteRefreshSegmentTrackerTests.java @@ -9,6 +9,8 @@ package org.opensearch.index.remote; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.index.shard.ShardId; @@ -16,6 +18,7 @@ import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; +import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -401,4 +404,79 @@ public void testIsUploadTimeMsAverageReady() { assertEquals((double) sum / 20, pressureTracker.getUploadTimeMsAverage(), 0.0d); } + /** + * Tests whether RemoteRefreshSegmentTracker.Stats object generated correctly from RemoteRefreshSegmentTracker. + * */ + public void testStatsObjectCreation() { + pressureTracker = constructTracker(); + RemoteRefreshSegmentTracker.Stats pressureTrackerStats = pressureTracker.stats(); + assertEquals(pressureTracker.getShardId(), pressureTrackerStats.shardId); + assertEquals(pressureTracker.getLocalRefreshTimeMs(), (int) pressureTrackerStats.localRefreshTimeMs); + assertEquals(pressureTracker.getLocalRefreshSeqNo(), (int) pressureTrackerStats.localRefreshNumber); + assertEquals(pressureTracker.getRemoteRefreshTimeMs(), (int) pressureTrackerStats.remoteRefreshTimeMs); + assertEquals(pressureTracker.getRemoteRefreshSeqNo(), (int) pressureTrackerStats.remoteRefreshNumber); + assertEquals(pressureTracker.getBytesLag(), (int) pressureTrackerStats.bytesLag); + assertEquals(pressureTracker.getRejectionCount(), (int) pressureTrackerStats.rejectionCount); + assertEquals(pressureTracker.getConsecutiveFailureCount(), (int) pressureTrackerStats.consecutiveFailuresCount); + assertEquals(pressureTracker.getUploadBytesStarted(), (int) pressureTrackerStats.uploadBytesStarted); + assertEquals(pressureTracker.getUploadBytesSucceeded(), (int) pressureTrackerStats.uploadBytesSucceeded); + assertEquals(pressureTracker.getUploadBytesFailed(), (int) pressureTrackerStats.uploadBytesFailed); + assertEquals(pressureTracker.getUploadBytesAverage(), pressureTrackerStats.uploadBytesMovingAverage, 0); + assertEquals(pressureTracker.getUploadBytesPerSecAverage(), pressureTrackerStats.uploadBytesPerSecMovingAverage, 0); + assertEquals(pressureTracker.getUploadTimeMsAverage(), pressureTrackerStats.uploadTimeMovingAverage, 0); + assertEquals(pressureTracker.getTotalUploadsStarted(), (int) pressureTrackerStats.totalUploadsStarted); + assertEquals(pressureTracker.getTotalUploadsSucceeded(), (int) pressureTrackerStats.totalUploadsSucceeded); + assertEquals(pressureTracker.getTotalUploadsFailed(), (int) pressureTrackerStats.totalUploadsFailed); + } + + /** + * Tests whether RemoteRefreshSegmentTracker.Stats object serialize and deserialize is working fine. + * This comes into play during internode data transfer. + * */ + public void testStatsObjectCreationViaStream() throws IOException { + pressureTracker = constructTracker(); + RemoteRefreshSegmentTracker.Stats pressureTrackerStats = pressureTracker.stats(); + try (BytesStreamOutput out = new BytesStreamOutput()) { + pressureTrackerStats.writeTo(out); + try (StreamInput in = out.bytes().streamInput()) { + RemoteRefreshSegmentTracker.Stats deserializedStats = new RemoteRefreshSegmentTracker.Stats(in); + assertEquals(deserializedStats.shardId, pressureTrackerStats.shardId); + assertEquals((int) deserializedStats.localRefreshTimeMs, (int) pressureTrackerStats.localRefreshTimeMs); + assertEquals((int) deserializedStats.localRefreshNumber, (int) pressureTrackerStats.localRefreshNumber); + assertEquals((int) deserializedStats.remoteRefreshTimeMs, (int) pressureTrackerStats.remoteRefreshTimeMs); + assertEquals((int) deserializedStats.remoteRefreshNumber, (int) pressureTrackerStats.remoteRefreshNumber); + assertEquals((int) deserializedStats.bytesLag, (int) pressureTrackerStats.bytesLag); + assertEquals((int) deserializedStats.rejectionCount, (int) pressureTrackerStats.rejectionCount); + assertEquals((int) deserializedStats.consecutiveFailuresCount, (int) pressureTrackerStats.consecutiveFailuresCount); + assertEquals((int) deserializedStats.uploadBytesStarted, (int) pressureTrackerStats.uploadBytesStarted); + assertEquals((int) deserializedStats.uploadBytesSucceeded, (int) pressureTrackerStats.uploadBytesSucceeded); + assertEquals((int) deserializedStats.uploadBytesFailed, (int) pressureTrackerStats.uploadBytesFailed); + assertEquals((int) deserializedStats.uploadBytesMovingAverage, pressureTrackerStats.uploadBytesMovingAverage, 0); + assertEquals( + (int) deserializedStats.uploadBytesPerSecMovingAverage, + pressureTrackerStats.uploadBytesPerSecMovingAverage, + 0 + ); + assertEquals((int) deserializedStats.uploadTimeMovingAverage, pressureTrackerStats.uploadTimeMovingAverage, 0); + assertEquals((int) deserializedStats.totalUploadsStarted, (int) pressureTrackerStats.totalUploadsStarted); + assertEquals((int) deserializedStats.totalUploadsSucceeded, (int) pressureTrackerStats.totalUploadsSucceeded); + assertEquals((int) deserializedStats.totalUploadsFailed, (int) pressureTrackerStats.totalUploadsFailed); + } + } + } + + private RemoteRefreshSegmentTracker constructTracker() { + RemoteRefreshSegmentTracker segmentPressureTracker = new RemoteRefreshSegmentTracker( + shardId, + pressureSettings.getUploadBytesMovingAverageWindowSize(), + pressureSettings.getUploadBytesPerSecMovingAverageWindowSize(), + pressureSettings.getUploadTimeMovingAverageWindowSize() + ); + segmentPressureTracker.incrementTotalUploadsFailed(); + segmentPressureTracker.addUploadTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + segmentPressureTracker.addUploadBytes(99); + segmentPressureTracker.updateRemoteRefreshTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + segmentPressureTracker.incrementRejectionCount(); + return segmentPressureTracker; + } } From ff138cb87aff999caa01595781e53bd20ec44cb5 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Thu, 18 May 2023 07:33:49 -0700 Subject: [PATCH 10/10] Fix version check for search pipelines REST test (#7618) As @reta pointed out, these tests should be skipped for versions less than 2.7.0, but the skip version ranges are inclusive. So, the upper bound of the range should be 2.6.99. This brings a change made for https://github.com/opensearch-project/OpenSearch/pull/7589 onto the main branch. Signed-off-by: Michael Froh --- .../rest-api-spec/test/search_pipeline/10_basic.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml index 46a63b79afb31..60c0706415bc2 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml @@ -1,7 +1,7 @@ --- "Test basic pipeline crud": - skip: - version: " - 2.7.0" + version: " - 2.6.99" reason: "Added in 2.7.0" - do: search_pipeline.put: @@ -32,7 +32,7 @@ --- "Test Put Versioned Pipeline": - skip: - version: " - 2.7.0" + version: " - 2.6.99" reason: "Added in 2.7.0" - do: search_pipeline.put: @@ -125,7 +125,7 @@ --- "Test Get All Pipelines": - skip: - version: " - 2.7.0" + version: " - 2.6.99" reason: "Added in 2.7.0" - do: search_pipeline.put: @@ -152,7 +152,7 @@ --- "Test invalid config": - skip: - version: " - 2.7.0" + version: " - 2.6.99" reason: "Added in 2.7.0" - do: catch: /parse_exception/