Skip to content

Commit

Permalink
Wait for shard index folder to be cleaned up (elastic#80341)
Browse files Browse the repository at this point in the history
The tests testCreateAndRestoreSearchableSnapshot and
testCreateAndRestorePartialSearchableSnapshot both
failed once when asserting the shard folders using
assertShardFolders(index, true).

The failures occurred when the original index is first closed
(not deleted) and mounted again under the same name (so
it will be restored as a searchable snapshot index on top of
the existing shard files). The SearchableSnapshotDirectory
implementation takes care to clean up the shard files on disk
using SearchableSnapshotDirectory.cleanExistingRegularShardFiles()
and the tests verify that the shard index folder is indeed deleted
from disk on all nodes but sometime fail because the folder
is still present.

I wasn't able to reproduce but I think that the closing of the
original index + the creation of the .snapshot-blob-cache
index trigger some shard relocations that are cancelled by
the subsequent mount/restore, leaving some files on disk
that should be cleaned up but maybe not immediately.

This commit changes the tests to assertBusy() when
verifying the shard folders and also adds more logging
information in case waiting for the
assertShardFolders(index, true) is not enough.

Closes elastic#77831
  • Loading branch information
tlrx committed Nov 5, 2021
1 parent 2a48308 commit bed89cf
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.concurrent.AtomicArray;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.Index;
Expand Down Expand Up @@ -61,6 +62,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.not;

public abstract class BaseSearchableSnapshotsIntegTestCase extends AbstractSnapshotIntegTestCase {
@Override
Expand All @@ -70,7 +72,7 @@ protected boolean addMockInternalEngine() {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return org.elasticsearch.core.List.of(LocalStateSearchableSnapshots.class);
return CollectionUtils.appendToCopy(super.nodePlugins(), LocalStateSearchableSnapshots.class);
}

@Override
Expand Down Expand Up @@ -235,9 +237,17 @@ protected void assertShardFolders(String indexName, boolean snapshotDirectory) t
final ShardPath shardPath = ShardPath.loadShardPath(logger, service, shardId, customDataPath);
if (shardPath != null && Files.exists(shardPath.getDataPath())) {
shardFolderFound = true;
assertEquals(snapshotDirectory, Files.notExists(shardPath.resolveIndex()));

assertTrue(Files.exists(shardPath.resolveTranslog()));
final boolean indexExists = Files.exists(shardPath.resolveIndex());
final boolean translogExists = Files.exists(shardPath.resolveTranslog());
logger.info(
"--> [{}] verifying shard data path [{}] (index exists: {}, translog exists: {})",
node,
shardPath.getDataPath(),
indexExists,
translogExists
);
assertThat(snapshotDirectory, not(indexExists));
assertTrue(translogExists);
try (Stream<Path> dir = Files.list(shardPath.resolveTranslog())) {
final long translogFiles = dir.filter(path -> path.getFileName().toString().contains("translog")).count();
if (snapshotDirectory) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ public void testCreateAndRestorePartialSearchableSnapshot() throws Exception {
// TODO: fix
// assertSearchableSnapshotStats(restoredIndexName, true, nonCachedExtensions);
ensureGreen(restoredIndexName);
assertShardFolders(restoredIndexName, true);
assertBusy(() -> assertShardFolders(restoredIndexName, true));

assertThat(
client().admin()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public void testCreateAndRestoreSearchableSnapshot() throws Exception {
assertSearchableSnapshotStats(restoredIndexName, cacheEnabled, nonCachedExtensions);

ensureGreen(restoredIndexName);
assertShardFolders(restoredIndexName, true);
assertBusy(() -> assertShardFolders(restoredIndexName, true));

assertThat(
client().admin()
Expand Down

0 comments on commit bed89cf

Please sign in to comment.