From 18fba4b1df0d13556838053c3e96e7ce1bc74ec2 Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Mon, 7 Oct 2019 10:38:44 +0200
Subject: [PATCH] Add IT for Snapshot Issue in 47552 (#47627) (#47635)

* Add IT for Snapshot Issue in 47552 (#47627)

Adding a specific integration test that reproduces the problem
fixed in #47552. The issue fixed only reproduces in the snapshot
resiliency otherwise which are not available in 6.8 where the
fix is being backported to as well.
---
 .../DedicatedClusterSnapshotRestoreIT.java    | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
diff --git a/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java
index b924d1eccced1..c2368d9420139 100644
--- a/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java
+++ b/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java
@@ -1237,6 +1237,55 @@ public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
         }, 60L, TimeUnit.SECONDS);
     }
 
+    public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
+        logger.info("-->  starting a master node and two data nodes");
+        internalCluster().startMasterOnlyNode();
+        final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
+        logger.info("-->  creating repository");
+        assertAcked(client().admin().cluster().preparePutRepository("test-repo")
+            .setType("mock").setSettings(Settings.builder()
+                .put("location", randomRepoPath())
+                .put("compress", randomBoolean())
+                .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
+        assertAcked(prepareCreate("test-idx", 0, Settings.builder()
+            .put("number_of_shards", 2).put("number_of_replicas", 0)));
+        ensureGreen();
+        logger.info("-->  indexing some data");
+        final int numdocs = randomIntBetween(50, 100);
+        IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
+        for (int i = 0; i < builders.length; i++) {
+            builders[i] = client().prepareIndex("test-idx", "type1",
+                Integer.toString(i)).setSource("field1", "bar " + i);
+        }
+        indexRandom(true, builders);
+        flushAndRefresh();
+        blockAllDataNodes("test-repo");
+        logger.info("-->  snapshot");
+        client(internalCluster().getMasterName()).admin().cluster()
+            .prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
+        logger.info("-->  restarting first data node, which should cause the primary shard on it to be failed");
+        internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
+
+        logger.info("-->  wait for shard snapshot of first primary to show as failed");
+        assertBusy(() -> assertThat(
+            client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
+                .get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
+
+        logger.info("-->  restarting second data node, which should cause the primary shard on it to be failed");
+        internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
+
+        // check that snapshot completes with both failed shards being accounted for in the snapshot result
+        assertBusy(() -> {
+            GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
+                .prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
+            assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
+            SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
+            assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
+            assertThat(snapshotInfo.totalShards(), is(2));
+            assertThat(snapshotInfo.shardFailures(), hasSize(2));
+        }, 60L, TimeUnit.SECONDS);
+    }
+
     public void testRetentionLeasesClearedOnRestore() throws Exception {
         final String repoName = "test-repo-retention-leases";
         assertAcked(client().admin().cluster().preparePutRepository(repoName)