From 2a48308358169154b7ad93f7bdfeb95de51b0018 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Wed, 3 Nov 2021 12:44:06 +0100 Subject: [PATCH] [7.16] Fix SearchableSnapshotsPersistentCacheIntegTests (#80077) (#80262) * Fix SearchableSnapshotsPersistentCacheIntegTests (#80077) The two tests in SearchableSnapshotsPersistentCacheIntegTests failed few times when they were expecting the persistent cache to be empty after mounted indices deletions. While I wasn't able to reproduce the issue (which happened ~5 times in the last 2 months) I suspect the cause are the same: the tests wait for the mounted index to be green but don't always wait for the prewarming to be completed. I think it is possible that a part of a cache file finishes to be prewarmed after the test is completed, making the next step to fail as the persistent cache will contain a doc that is not related to the subsequent test. This commit changes the testCacheSurviveRestart so that it waits for the prewarming to complete. Similarly it changes t estPersistentCacheCleanUpAfterRelocation to also wait for the prewarming to complete (just in case, this test verifies that the recovery is done). It also logs more information about the emptiness of the persistent cache on data nodes and makes the cluster scope to TEST to ensure that a dedicated test cluster is used for each test. Closes #76159 Closes #76160 * usual suspect --- ...bleSnapshotsPersistentCacheIntegTests.java | 50 +++++++++++++------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPersistentCacheIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPersistentCacheIntegTests.java index c5f2fc6d5ccd7..1d2d8fa90b7b8 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPersistentCacheIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPersistentCacheIntegTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.test.BackgroundIndexer; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.xpack.searchablesnapshots.BaseSearchableSnapshotsIntegTestCase; import org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshots; @@ -46,6 +47,7 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.notNullValue; +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST) public class SearchableSnapshotsPersistentCacheIntegTests extends BaseSearchableSnapshotsIntegTestCase { @Override @@ -87,6 +89,9 @@ public void testCacheSurviveRestart() throws Exception { ); ensureGreen(restoredIndexName); + assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME); + assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME); + final Index restoredIndex = client().admin() .cluster() .prepareState() @@ -147,20 +152,20 @@ public Settings onNodeStopped(String nodeName) { } }); + ensureGreen(restoredIndexName); + + assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME); + assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME); + final CacheService cacheServiceAfterRestart = internalCluster().getInstance(CacheService.class, dataNode); final PersistentCache persistentCacheAfterRestart = cacheServiceAfterRestart.getPersistentCache(); - ensureGreen(restoredIndexName); cacheFiles.forEach(cacheFile -> assertTrue(cacheFile + " should have survived node restart", Files.exists(cacheFile))); assertThat("Cache files should be loaded in cache", persistentCacheAfterRestart.getNumDocs(), equalTo((long) cacheFiles.size())); assertAcked(client().admin().indices().prepareDelete(restoredIndexName)); - - assertBusy(() -> { - cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile))); - cacheServiceAfterRestart.synchronizeCache(); - assertThat(persistentCacheAfterRestart.getNumDocs(), equalTo(0L)); - }); + assertBusy(() -> cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile)))); + assertEmptyPersistentCacheOnDataNodes(); } public void testPersistentCacheCleanUpAfterRelocation() throws Exception { @@ -187,6 +192,7 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception { final int numDocs = scaledRandomIntBetween(1_000, 5_000); try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numDocs)) { waitForDocs(numDocs, indexer); + indexer.stopAndAwaitStopped(); } refresh(indexName); @@ -214,7 +220,6 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception { .cluster() .prepareState() .clear() - .setRoutingTable(true) .setMetadata(true) .setIndices(mountedIndexName) .get(); @@ -248,6 +253,9 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception { ensureGreen(mountedIndexName); + assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME); + assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME); + recoveryResponse = client().admin().indices().prepareRecoveries(mountedIndexName).get(); assertTrue(recoveryResponse.shardRecoveryStates().containsKey(mountedIndexName)); assertTrue( @@ -271,12 +279,26 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception { logger.info("--> deleting mounted index {}", mountedIndex); assertAcked(client().admin().indices().prepareDelete(mountedIndexName)); + assertEmptyPersistentCacheOnDataNodes(); + } - assertBusy(() -> { - for (CacheService cacheService : internalCluster().getDataNodeInstances(CacheService.class)) { - cacheService.synchronizeCache(); - assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L)); - } - }); + private void assertEmptyPersistentCacheOnDataNodes() throws Exception { + final Set dataNodes = new HashSet<>(getDiscoveryNodes().getDataNodes().values()); + logger.info("--> verifying persistent caches are empty on nodes... {}", dataNodes); + try { + assertBusy(() -> { + for (DiscoveryNode node : org.elasticsearch.core.List.copyOf(dataNodes)) { + final CacheService cacheService = internalCluster().getInstance(CacheService.class, node.getName()); + cacheService.synchronizeCache(); + assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L)); + logger.info("--> persistent cache is empty on node {}", node); + dataNodes.remove(node); + } + }); + logger.info("--> all persistent caches are empty"); + } catch (AssertionError ae) { + logger.error("--> persistent caches not empty on nodes: {}", dataNodes); + throw ae; + } } }