-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wait for prewarm when relocating searchable snapshot shards #65531
Changes from 22 commits
74eae5c
e688e74
e57db07
f489058
95dadb2
d5ee3f3
fa8dea6
eccb1b4
a428a8b
472fa1e
fc01ad4
48c55f5
40e18cd
9427179
ed4e8c9
59ab566
566884e
3b41f93
43816f5
714e6c1
86e9ebb
d44b120
ef5032e
b9063b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.searchablesnapshots; | ||
|
||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.metadata.IndexMetadata; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.common.Priority; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.indices.recovery.RecoveryState; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.snapshots.mockstore.MockRepository; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
import org.elasticsearch.threadpool.ThreadPool; | ||
import org.hamcrest.Matchers; | ||
|
||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.CyclicBarrier; | ||
import java.util.concurrent.Executor; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; | ||
|
||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) | ||
public class SearchableSnapshotsRelocationIntegTests extends BaseSearchableSnapshotsIntegTestCase { | ||
|
||
@Override | ||
protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
return List.of(LocalStateSearchableSnapshots.class, MockRepository.Plugin.class); | ||
} | ||
|
||
public void testRelocationWaitsForPreWarm() throws Exception { | ||
internalCluster().startMasterOnlyNode(); | ||
final String firstDataNode = internalCluster().startDataOnlyNode(); | ||
final String index = "test-idx"; | ||
createIndexWithContent(index, indexSettingsNoReplicas(1).build()); | ||
final String repoName = "test-repo"; | ||
createRepository(repoName, "mock"); | ||
final String snapshotName = "test-snapshot"; | ||
createSnapshot(repoName, snapshotName, List.of(index)); | ||
assertAcked(client().admin().indices().prepareDelete(index)); | ||
final String restoredIndex = mountSnapshot(repoName, snapshotName, index, Settings.EMPTY); | ||
ensureGreen(restoredIndex); | ||
final String secondDataNode = internalCluster().startDataOnlyNode(); | ||
|
||
final ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, secondDataNode); | ||
final int preWarmThreads = threadPool.info(SearchableSnapshotsConstants.CACHE_PREWARMING_THREAD_POOL_NAME).getMax(); | ||
final Executor executor = threadPool.executor(SearchableSnapshotsConstants.CACHE_PREWARMING_THREAD_POOL_NAME); | ||
final CyclicBarrier barrier = new CyclicBarrier(preWarmThreads + 1); | ||
final CountDownLatch latch = new CountDownLatch(1); | ||
for (int i = 0; i < preWarmThreads; i++) { | ||
executor.execute(() -> { | ||
try { | ||
barrier.await(); | ||
latch.await(); | ||
} catch (Exception e) { | ||
throw new AssertionError(e); | ||
} | ||
}); | ||
} | ||
logger.info("--> waiting for prewarm threads to all become blocked"); | ||
barrier.await(); | ||
|
||
logger.info("--> force index [{}] to relocate to [{}]", index, secondDataNode); | ||
assertAcked( | ||
client().admin() | ||
.indices() | ||
.prepareUpdateSettings(restoredIndex) | ||
.setSettings( | ||
Settings.builder() | ||
.put( | ||
IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getConcreteSettingForNamespace("_name").getKey(), | ||
secondDataNode | ||
) | ||
) | ||
); | ||
assertBusy(() -> { | ||
final List<RecoveryState> recoveryStates = getActiveRestores(restoredIndex); | ||
assertThat(recoveryStates, Matchers.hasSize(1)); | ||
final RecoveryState shardRecoveryState = recoveryStates.get(0); | ||
assertEquals(firstDataNode, shardRecoveryState.getSourceNode().getName()); | ||
assertEquals(secondDataNode, shardRecoveryState.getTargetNode().getName()); | ||
}); | ||
|
||
logger.info("--> sleep for 5s to ensure we are actually stuck at the FINALIZE stage and that the primary has not yet relocated"); | ||
TimeUnit.SECONDS.sleep(5L); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we instead find the shard using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ thanks, you actually prevented a likely test failure here as well :) I moved the check for translog stage to a busy assert and then added the check for one clean files condition after. Otherwise we'd only have had 5s to arrive at |
||
final RecoveryState recoveryState = getActiveRestores(restoredIndex).get(0); | ||
assertSame(RecoveryState.Stage.TRANSLOG, recoveryState.getStage()); | ||
final ClusterState state = client().admin().cluster().prepareState().get().getState(); | ||
final String primaryNodeId = state.routingTable().index(restoredIndex).shard(0).primaryShard().currentNodeId(); | ||
final DiscoveryNode primaryNode = state.nodes().resolveNode(primaryNodeId); | ||
assertEquals(firstDataNode, primaryNode.getName()); | ||
|
||
logger.info("--> unblocking prewarm threads"); | ||
latch.countDown(); | ||
|
||
assertFalse( | ||
client().admin() | ||
.cluster() | ||
.prepareHealth(restoredIndex) | ||
.setWaitForNoRelocatingShards(true) | ||
.setWaitForEvents(Priority.LANGUID) | ||
.get() | ||
.isTimedOut() | ||
); | ||
assertBusy(() -> assertThat(getActiveRestores(restoredIndex), Matchers.empty())); | ||
} | ||
|
||
private static List<RecoveryState> getActiveRestores(String restoredIndex) { | ||
return client().admin() | ||
.indices() | ||
.prepareRecoveries(restoredIndex) | ||
.setDetailed(true) | ||
.setActiveOnly(true) | ||
.get() | ||
.shardRecoveryStates() | ||
.get(restoredIndex); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a little low-tech relative to the tricky ref-counting in the
IndexShard
. I figured this was ok here since the hand-off request only comes in once (at least judging by the assertions we have inIndexShard
) while the other API has a more "feel" to it and there are no hard guarantees on the index shard state listener only being invoked once (though the "loaded" flag on the directory effectively guarantees we only add one condition for now) and it wasn't that much extra effort since the API was supposed to be non-blocking anyway.