From 49902761479e85da18d0521c3f9a4fba428b51ad Mon Sep 17 00:00:00 2001 From: Iraklis Psaroudakis Date: Mon, 7 Oct 2024 21:15:04 +0300 Subject: [PATCH] Fast refresh indices should use search shards (#113478) Fast refresh indices should now behave like non fast refresh indices in how they execute (m)gets and searches. I.e., they should use the search shards. For BWC, we define a new transport version. We expect search shards to be upgraded first, before promotable shards. Until the cluster is fully upgraded, the promotable shards (whether upgraded or not) will still receive and execute gets/searches locally. Relates ES-9573 Relates ES-9579 --- .../org/elasticsearch/TransportVersions.java | 1 + .../refresh/TransportShardRefreshAction.java | 32 +++++++------------ ...ansportUnpromotableShardRefreshAction.java | 15 +++++++++ .../action/get/TransportGetAction.java | 3 +- .../get/TransportShardMultiGetAction.java | 3 +- .../support/replication/PostWriteRefresh.java | 9 ++---- .../cluster/routing/OperationRouting.java | 9 +++++- .../index/cache/bitset/BitsetFilterCache.java | 2 +- .../routing/IndexRoutingTableTests.java | 24 ++++++++------ .../cache/bitset/BitSetFilterCacheTests.java | 2 +- 10 files changed, 56 insertions(+), 44 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index f6e4649aa4807..1911013cbe8e9 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -235,6 +235,7 @@ static TransportVersion def(int id) { public static final TransportVersion SEARCH_FAILURE_STATS = def(8_759_00_0); public static final TransportVersion INGEST_GEO_DATABASE_PROVIDERS = def(8_760_00_0); public static final TransportVersion DATE_TIME_DOC_VALUES_LOCALES = def(8_761_00_0); + public static final TransportVersion FAST_REFRESH_RCO = def(8_762_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java index 7857e9a22e9b9..cb667400240f0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java @@ -23,7 +23,6 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.injection.guice.Inject; @@ -120,27 +119,18 @@ public void onPrimaryOperationComplete( ActionListener listener ) { assert replicaRequest.primaryRefreshResult.refreshed() : "primary has not refreshed"; - boolean fastRefresh = IndexSettings.INDEX_FAST_REFRESH_SETTING.get( - clusterService.state().metadata().index(indexShardRoutingTable.shardId().getIndex()).getSettings() + UnpromotableShardRefreshRequest unpromotableReplicaRequest = new UnpromotableShardRefreshRequest( + indexShardRoutingTable, + replicaRequest.primaryRefreshResult.primaryTerm(), + replicaRequest.primaryRefreshResult.generation(), + false + ); + transportService.sendRequest( + transportService.getLocalNode(), + TransportUnpromotableShardRefreshAction.NAME, + unpromotableReplicaRequest, + new ActionListenerResponseHandler<>(listener.safeMap(r -> null), in -> ActionResponse.Empty.INSTANCE, refreshExecutor) ); - - // Indices marked with fast refresh do not rely on refreshing the unpromotables - if (fastRefresh) { - listener.onResponse(null); - } else { - UnpromotableShardRefreshRequest unpromotableReplicaRequest = new UnpromotableShardRefreshRequest( - indexShardRoutingTable, - replicaRequest.primaryRefreshResult.primaryTerm(), - replicaRequest.primaryRefreshResult.generation(), - false - ); - transportService.sendRequest( - transportService.getLocalNode(), - TransportUnpromotableShardRefreshAction.NAME, - unpromotableReplicaRequest, - new ActionListenerResponseHandler<>(listener.safeMap(r -> null), in -> ActionResponse.Empty.INSTANCE, refreshExecutor) - ); - } } } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java index 6c24ec2d17604..f91a983d47885 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java @@ -24,6 +24,9 @@ import java.util.List; +import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO; +import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; + public class TransportUnpromotableShardRefreshAction extends TransportBroadcastUnpromotableAction< UnpromotableShardRefreshRequest, ActionResponse.Empty> { @@ -73,6 +76,18 @@ protected void unpromotableShardOperation( return; } + // During an upgrade to FAST_REFRESH_RCO, we expect search shards to be first upgraded before the primary is upgraded. Thus, + // when the primary is upgraded, and starts to deliver unpromotable refreshes, we expect the search shards to be upgraded already. + // Note that the fast refresh setting is final. + // TODO: remove assertion (ES-9563) + assert INDEX_FAST_REFRESH_SETTING.get(shard.indexSettings().getSettings()) == false + || transportService.getLocalNodeConnection().getTransportVersion().onOrAfter(FAST_REFRESH_RCO) + : "attempted to refresh a fast refresh search shard " + + shard + + " on transport version " + + transportService.getLocalNodeConnection().getTransportVersion() + + " (before FAST_REFRESH_RCO)"; + ActionListener.run(responseListener, listener -> { shard.waitForPrimaryTermAndGeneration( request.getPrimaryTerm(), diff --git a/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java b/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java index 189aa1c95d865..99eac250641ae 100644 --- a/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java +++ b/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java @@ -125,11 +125,10 @@ protected void asyncShardOperation(GetRequest request, ShardId shardId, ActionLi IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); IndexShard indexShard = indexService.getShard(shardId.id()); if (indexShard.routingEntry().isPromotableToPrimary() == false) { - assert indexShard.indexSettings().isFastRefresh() == false - : "a search shard should not receive a TransportGetAction for an index with fast refresh"; handleGetOnUnpromotableShard(request, indexShard, listener); return; } + // TODO: adapt assertion to assert only that it is not stateless (ES-9563) assert DiscoveryNode.isStateless(clusterService.getSettings()) == false || indexShard.indexSettings().isFastRefresh() : "in Stateless a promotable to primary shard can receive a TransportGetAction only if an index has the fast refresh setting"; if (request.realtime()) { // we are not tied to a refresh cycle here anyway diff --git a/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java b/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java index 8d5760307c3fe..633e7ef6793ab 100644 --- a/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java +++ b/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java @@ -124,11 +124,10 @@ protected void asyncShardOperation(MultiGetShardRequest request, ShardId shardId IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); IndexShard indexShard = indexService.getShard(shardId.id()); if (indexShard.routingEntry().isPromotableToPrimary() == false) { - assert indexShard.indexSettings().isFastRefresh() == false - : "a search shard should not receive a TransportShardMultiGetAction for an index with fast refresh"; handleMultiGetOnUnpromotableShard(request, indexShard, listener); return; } + // TODO: adapt assertion to assert only that it is not stateless (ES-9563) assert DiscoveryNode.isStateless(clusterService.getSettings()) == false || indexShard.indexSettings().isFastRefresh() : "in Stateless a promotable to primary shard can receive a TransportShardMultiGetAction only if an index has " + "the fast refresh setting"; diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/PostWriteRefresh.java b/server/src/main/java/org/elasticsearch/action/support/replication/PostWriteRefresh.java index 683c3589c893d..7414aeeb2c405 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/PostWriteRefresh.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/PostWriteRefresh.java @@ -19,7 +19,6 @@ import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.translog.Translog; @@ -53,9 +52,7 @@ public void refreshShard( case WAIT_UNTIL -> waitUntil(indexShard, location, new ActionListener<>() { @Override public void onResponse(Boolean forced) { - // Fast refresh indices do not depend on the unpromotables being refreshed - boolean fastRefresh = IndexSettings.INDEX_FAST_REFRESH_SETTING.get(indexShard.indexSettings().getSettings()); - if (location != null && (indexShard.routingEntry().isSearchable() == false && fastRefresh == false)) { + if (location != null && indexShard.routingEntry().isSearchable() == false) { refreshUnpromotables(indexShard, location, listener, forced, postWriteRefreshTimeout); } else { listener.onResponse(forced); @@ -68,9 +65,7 @@ public void onFailure(Exception e) { } }); case IMMEDIATE -> immediate(indexShard, listener.delegateFailureAndWrap((l, r) -> { - // Fast refresh indices do not depend on the unpromotables being refreshed - boolean fastRefresh = IndexSettings.INDEX_FAST_REFRESH_SETTING.get(indexShard.indexSettings().getSettings()); - if (indexShard.getReplicationGroup().getRoutingTable().unpromotableShards().size() > 0 && fastRefresh == false) { + if (indexShard.getReplicationGroup().getRoutingTable().unpromotableShards().size() > 0) { sendUnpromotableRequests(indexShard, r.generation(), true, l, postWriteRefreshTimeout); } else { l.onResponse(true); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java index f7812d284f2af..9120e25b443d7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO; import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; public class OperationRouting { @@ -305,8 +306,14 @@ public ShardId shardId(ClusterState clusterState, String index, String id, @Null } public static boolean canSearchShard(ShardRouting shardRouting, ClusterState clusterState) { + // TODO: remove if and always return isSearchable (ES-9563) if (INDEX_FAST_REFRESH_SETTING.get(clusterState.metadata().index(shardRouting.index()).getSettings())) { - return shardRouting.isPromotableToPrimary(); + // Until all the cluster is upgraded, we send searches/gets to the primary (even if it has been upgraded) to execute locally. + if (clusterState.getMinTransportVersion().onOrAfter(FAST_REFRESH_RCO)) { + return shardRouting.isSearchable(); + } else { + return shardRouting.isPromotableToPrimary(); + } } else { return shardRouting.isSearchable(); } diff --git a/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java b/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java index c19e3ca353569..3b37afc3b297b 100644 --- a/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java +++ b/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java @@ -105,7 +105,7 @@ static boolean shouldLoadRandomAccessFiltersEagerly(IndexSettings settings) { boolean loadFiltersEagerlySetting = settings.getValue(INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING); boolean isStateless = DiscoveryNode.isStateless(settings.getNodeSettings()); if (isStateless) { - return DiscoveryNode.hasRole(settings.getNodeSettings(), DiscoveryNodeRole.INDEX_ROLE) + return DiscoveryNode.hasRole(settings.getNodeSettings(), DiscoveryNodeRole.SEARCH_ROLE) && loadFiltersEagerlySetting && INDEX_FAST_REFRESH_SETTING.get(settings.getSettings()); } else { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java index 21b30557cafea..6a7f4bb27a324 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.cluster.routing; +import org.elasticsearch.TransportVersion; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; @@ -19,6 +20,7 @@ import java.util.List; +import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO; import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -27,16 +29,22 @@ public class IndexRoutingTableTests extends ESTestCase { public void testReadyForSearch() { - innerReadyForSearch(false); - innerReadyForSearch(true); + innerReadyForSearch(false, false); + innerReadyForSearch(false, true); + innerReadyForSearch(true, false); + innerReadyForSearch(true, true); } - private void innerReadyForSearch(boolean fastRefresh) { + // TODO: remove if (fastRefresh && beforeFastRefreshRCO) branches (ES-9563) + private void innerReadyForSearch(boolean fastRefresh, boolean beforeFastRefreshRCO) { Index index = new Index(randomIdentifier(), UUIDs.randomBase64UUID()); ClusterState clusterState = mock(ClusterState.class, Mockito.RETURNS_DEEP_STUBS); when(clusterState.metadata().index(any(Index.class)).getSettings()).thenReturn( Settings.builder().put(INDEX_FAST_REFRESH_SETTING.getKey(), fastRefresh).build() ); + when(clusterState.getMinTransportVersion()).thenReturn( + beforeFastRefreshRCO ? TransportVersion.fromId(FAST_REFRESH_RCO.id() - 1_00_0) : TransportVersion.current() + ); // 2 primaries that are search and index ShardId p1 = new ShardId(index, 0); IndexShardRoutingTable shardTable1 = new IndexShardRoutingTable( @@ -55,7 +63,7 @@ private void innerReadyForSearch(boolean fastRefresh) { shardTable1 = new IndexShardRoutingTable(p1, List.of(getShard(p1, true, ShardRoutingState.STARTED, ShardRouting.Role.INDEX_ONLY))); shardTable2 = new IndexShardRoutingTable(p2, List.of(getShard(p2, true, ShardRoutingState.STARTED, ShardRouting.Role.INDEX_ONLY))); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh) { + if (fastRefresh && beforeFastRefreshRCO) { assertTrue(indexRoutingTable.readyForSearch(clusterState)); } else { assertFalse(indexRoutingTable.readyForSearch(clusterState)); @@ -91,7 +99,7 @@ private void innerReadyForSearch(boolean fastRefresh) { ) ); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh) { + if (fastRefresh && beforeFastRefreshRCO) { assertTrue(indexRoutingTable.readyForSearch(clusterState)); } else { assertFalse(indexRoutingTable.readyForSearch(clusterState)); @@ -118,8 +126,6 @@ private void innerReadyForSearch(boolean fastRefresh) { assertTrue(indexRoutingTable.readyForSearch(clusterState)); // 2 unassigned primaries that are index only with some replicas that are all available - // Fast refresh indices do not support replicas so this can not practically happen. If we add support we will want to ensure - // that readyForSearch allows for searching replicas when the index shard is not available. shardTable1 = new IndexShardRoutingTable( p1, List.of( @@ -137,8 +143,8 @@ private void innerReadyForSearch(boolean fastRefresh) { ) ); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh) { - assertFalse(indexRoutingTable.readyForSearch(clusterState)); // if we support replicas for fast refreshes this needs to change + if (fastRefresh && beforeFastRefreshRCO) { + assertFalse(indexRoutingTable.readyForSearch(clusterState)); } else { assertTrue(indexRoutingTable.readyForSearch(clusterState)); } diff --git a/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java b/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java index 77635fd0312f8..4cb3ce418f761 100644 --- a/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java +++ b/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java @@ -276,7 +276,7 @@ public void testShouldLoadRandomAccessFiltersEagerly() { for (var isStateless : values) { if (isStateless) { assertEquals( - loadFiltersEagerly && indexFastRefresh && hasIndexRole, + loadFiltersEagerly && indexFastRefresh && hasIndexRole == false, BitsetFilterCache.shouldLoadRandomAccessFiltersEagerly( bitsetFilterCacheSettings(isStateless, hasIndexRole, loadFiltersEagerly, indexFastRefresh) )