Skip to content

Commit

Permalink
Add IT for Snapshot Issue in 47552
Browse files Browse the repository at this point in the history
Adding a specific integration test that reproduces the problem
fixed in elastic#47552. The issue fixed only reproduces in the snapshot
resiliency otherwise which are not available in 6.8 where the
fix is being backported to as well.
  • Loading branch information
original-brownbear committed Oct 6, 2019
1 parent 9945d5c commit 8a07fa3
Showing 1 changed file with 49 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,55 @@ public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
}, 60L, TimeUnit.SECONDS);
}

public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
logger.info("--> starting a master node and two data nodes");
internalCluster().startMasterOnlyNode();
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
logger.info("--> creating repository");
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
.setType("mock").setSettings(Settings.builder()
.put("location", randomRepoPath())
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
.put("number_of_shards", 2).put("number_of_replicas", 0)));
ensureGreen();
logger.info("--> indexing some data");
final int numdocs = randomIntBetween(50, 100);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test-idx", "type1",
Integer.toString(i)).setSource("field1", "bar " + i);
}
indexRandom(true, builders);
flushAndRefresh();
blockAllDataNodes("test-repo");
logger.info("--> snapshot");
client(internalCluster().getMasterName()).admin().cluster()
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);

logger.info("--> wait for shard snapshot of first primary to show as failed");
assertBusy(() -> assertThat(
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);

logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);

// check that snapshot completes with both failed shards being accounted for in the snapshot result
assertBusy(() -> {
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
assertEquals(1, snapshotsStatusResponse.getSnapshots("test-repo").size());
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots("test-repo").get(0);
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
assertThat(snapshotInfo.totalShards(), is(2));
assertThat(snapshotInfo.shardFailures(), hasSize(2));
}, 60L, TimeUnit.SECONDS);
}

public void testRetentionLeasesClearedOnRestore() throws Exception {
final String repoName = "test-repo-retention-leases";
assertAcked(client().admin().cluster().preparePutRepository(repoName)
Expand Down

0 comments on commit 8a07fa3

Please sign in to comment.