Skip to content

Commit

Permalink
Wait for shard index folder to be cleaned up (elastic#80341)
Browse files Browse the repository at this point in the history
The tests testCreateAndRestoreSearchableSnapshot and 
testCreateAndRestorePartialSearchableSnapshot both 
failed once when asserting the shard folders using 
assertShardFolders(index, true).

The failures occurred when the original index is first closed 
(not deleted) and mounted again under the same name (so 
it will be restored as a searchable snapshot index on top of 
the existing shard files). The SearchableSnapshotDirectory 
implementation takes care to clean up the shard files on disk 
using SearchableSnapshotDirectory.cleanExistingRegularShardFiles() 
and the tests verify that the shard index folder is indeed deleted 
from disk on all nodes but sometime fail because the folder 
is still present.

I wasn't able to reproduce but I think that the closing of the 
original index + the creation of the .snapshot-blob-cache 
index trigger some shard relocations that are cancelled by 
the subsequent mount/restore, leaving some files on disk 
that should be cleaned up but maybe not immediately.

This commit changes the tests to assertBusy() when 
verifying the shard folders and also adds more logging 
information in case waiting for the 
assertShardFolders(index, true) is not enough.

Closes elastic#77831
  • Loading branch information
tlrx committed Nov 5, 2021
1 parent b657473 commit 4806fa7
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.concurrent.AtomicArray;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.Index;
Expand Down Expand Up @@ -65,6 +66,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.not;

@ESIntegTestCase.ClusterScope(supportsDedicatedMasters = false, numClientNodes = 0)
public abstract class BaseSearchableSnapshotsIntegTestCase extends AbstractSnapshotIntegTestCase {
Expand All @@ -75,7 +77,7 @@ protected boolean addMockInternalEngine() {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return List.of(LocalStateSearchableSnapshots.class);
return CollectionUtils.appendToCopy(super.nodePlugins(), LocalStateSearchableSnapshots.class);
}

@Override
Expand Down Expand Up @@ -234,9 +236,17 @@ protected void assertShardFolders(String indexName, boolean snapshotDirectory) t
final ShardPath shardPath = ShardPath.loadShardPath(logger, service, shardId, customDataPath);
if (shardPath != null && Files.exists(shardPath.getDataPath())) {
shardFolderFound = true;
assertEquals(snapshotDirectory, Files.notExists(shardPath.resolveIndex()));

assertTrue(Files.exists(shardPath.resolveTranslog()));
final boolean indexExists = Files.exists(shardPath.resolveIndex());
final boolean translogExists = Files.exists(shardPath.resolveTranslog());
logger.info(
"--> [{}] verifying shard data path [{}] (index exists: {}, translog exists: {})",
node,
shardPath.getDataPath(),
indexExists,
translogExists
);
assertThat(snapshotDirectory, not(indexExists));
assertTrue(translogExists);
try (Stream<Path> dir = Files.list(shardPath.resolveTranslog())) {
final long translogFiles = dir.filter(path -> path.getFileName().toString().contains("translog")).count();
if (snapshotDirectory) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ public void testCreateAndRestorePartialSearchableSnapshot() throws Exception {
// TODO: fix
// assertSearchableSnapshotStats(restoredIndexName, true, nonCachedExtensions);
ensureGreen(restoredIndexName);
assertShardFolders(restoredIndexName, true);
assertBusy(() -> assertShardFolders(restoredIndexName, true));

assertThat(
client().admin()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ public void testCreateAndRestoreSearchableSnapshot() throws Exception {
assertSearchableSnapshotStats(restoredIndexName, cacheEnabled, nonCachedExtensions);

ensureGreen(restoredIndexName);
assertShardFolders(restoredIndexName, true);
assertBusy(() -> assertShardFolders(restoredIndexName, true));

assertThat(
client().admin()
Expand Down

0 comments on commit 4806fa7

Please sign in to comment.