Skip to content

Commit

Permalink
Fix testRetentionWhileSnapshotInProgress (#48219)
Browse files Browse the repository at this point in the history
This test could fail for two reasons, both should be fixed by this PR:

1) It hit a timeout for an `assertBusy`. This commit increases the
timeout for that `assertBusy`.

2) The snapshot that was supposed to be blocked could, in fact, be
successful. This is because a previous snapshot had been successfully
been taken, and no new data had been added between the two snapshots.
This means that no new segment files needed to be written for the new
snapshot, so the block on data files was never triggered. This commit
changes two things: First, it indexes some new data before taking the
second snapshot (the one that needs to be blocked), and second,
checks to ensure that the block is actually hit before continuing
with the test.
  • Loading branch information
gwbrown authored Oct 18, 2019
1 parent 3ea666d commit a2217f4
Showing 1 changed file with 26 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.elasticsearch.snapshots.SnapshotState;
import org.elasticsearch.snapshots.mockstore.MockRepository;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.core.XPackSettings;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
Expand All @@ -44,6 +43,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
Expand All @@ -58,18 +58,17 @@
/**
* Tests for Snapshot Lifecycle Management that require a slow or blocked snapshot repo (using {@link MockRepository}
*/
@TestLogging(value = "org.elasticsearch.snapshots.mockstore:DEBUG",
reason = "https://github.com/elastic/elasticsearch/issues/46508")
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
public class SLMSnapshotBlockingIntegTests extends ESIntegTestCase {

private static final String REPO = "repo-id";
List<String> dataNodeNames = null;

@Before
public void ensureClusterNodes() {
logger.info("--> starting enough nodes to ensure we have enough to safely stop for tests");
internalCluster().startMasterOnlyNodes(2);
internalCluster().startDataOnlyNodes(2);
dataNodeNames = internalCluster().startDataOnlyNodes(2);
ensureGreen();
}

Expand Down Expand Up @@ -163,7 +162,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
final String policyId = "slm-policy";
int docCount = 20;
for (int i = 0; i < docCount; i++) {
index(indexName, "_doc", i + "", Collections.singletonMap("foo", "bar"));
index(indexName, "_doc", null, Collections.singletonMap("foo", "bar"));
}

initializeRepo(REPO);
Expand Down Expand Up @@ -196,15 +195,26 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
assertTrue("cluster state was not ready for deletion " + state, SnapshotRetentionTask.okayToDeleteSnapshots(state));
});

// Take another snapshot, but before doing that, block it from completing
logger.info("--> blocking nodes from completing snapshot");
logger.info("--> indexing more docs to force new segment files");
for (int i = 0; i < docCount; i++) {
index(indexName, "_doc", null, Collections.singletonMap("foo", "bar"));
}
refresh(indexName);

try {
// Take another snapshot, but before doing that, block it from completing
logger.info("--> blocking data nodes from completing snapshot");
blockAllDataNodes(REPO);
blockMasterFromFinalizingSnapshotOnIndexFile(REPO);
logger.info("--> blocked data nodes, executing policy");
final String secondSnapName = executePolicy(policyId);
logger.info("--> executed policy, got snapname [{}]", secondSnapName);


// Check that the executed snapshot shows up in the SLM output as in_progress
assertBusy(() -> {
logger.info("--> Waiting for at least one data node to hit the block");
assertTrue(dataNodeNames.stream().anyMatch(node -> checkBlocked(node, REPO)));
logger.info("--> at least one data node has hit the block");
GetSnapshotLifecycleAction.Response getResp =
client().execute(GetSnapshotLifecycleAction.INSTANCE, new GetSnapshotLifecycleAction.Request(policyId)).get();
logger.info("--> checking for in progress snapshot...");
Expand All @@ -218,7 +228,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
assertThat(inProgress.getState(), anyOf(equalTo(SnapshotsInProgress.State.INIT),
equalTo(SnapshotsInProgress.State.STARTED)));
assertNull(inProgress.getFailure());
});
}, 60, TimeUnit.SECONDS);

// Run retention
logger.info("--> triggering retention");
Expand All @@ -243,7 +253,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
}
});

// Cancel the ongoing snapshot to cancel it
// Cancel the ongoing snapshot (or just delete it if it finished)
assertBusy(() -> {
try {
logger.info("--> cancelling snapshot {}", secondSnapName);
Expand Down Expand Up @@ -508,4 +518,10 @@ public void waitForBlock(String node, String repository, TimeValue timeout) thro
}
fail("Timeout waiting for node [" + node + "] to be blocked");
}

public boolean checkBlocked(String node, String repository) {
RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, node);
MockRepository mockRepository = (MockRepository) repositoriesService.repository(repository);
return mockRepository.blocked();
}
}

0 comments on commit a2217f4

Please sign in to comment.