diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 114b1894af3ef..f5db347c2d55b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -1611,6 +1611,57 @@ public void testOutOfOrderCloneFinalization() throws Exception { ); } + public void testIndexDeletedWhileSnapshotQueuedAfterClone() throws Exception { + final String master = internalCluster().startMasterOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startDataOnlyNode(); + final String index1 = "index-1"; + final String index2 = "index-2"; + createIndexWithContent(index1); + createIndexWithContent(index2); + + final String repository = "test-repo"; + createRepository(repository, "mock"); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repository, sourceSnapshot); + + final IndexId index1Id = getRepositoryData(repository).resolveIndexId(index1); + blockMasterOnShardLevelSnapshotFile(repository, index1Id.getId()); + + final String cloneTarget = "target-snapshot"; + final ActionFuture cloneSnapshot = clusterAdmin().prepareCloneSnapshot( + repository, + sourceSnapshot, + cloneTarget + ).setIndices(index1, index2).execute(); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(master, repository); + + final ActionFuture snapshot3 = clusterAdmin().prepareCreateSnapshot(repository, "snapshot-3") + .setIndices(index1, index2) + .setWaitForCompletion(true) + .setPartial(true) + .execute(); + final ActionFuture snapshot2 = clusterAdmin().prepareCreateSnapshot(repository, "snapshot-2") + .setIndices(index2) + .setWaitForCompletion(true) + .execute(); + assertSuccessful(snapshot2); + awaitNumberOfSnapshotsInProgress(2); + assertFalse(snapshot3.isDone()); + assertAcked(admin().indices().prepareDelete(index1).get()); + assertSuccessful(snapshot3); + unblockNode(repository, master); + + assertAcked(cloneSnapshot.get()); + assertAcked(startDeleteSnapshot(repository, cloneTarget).get()); + + assertThat( + clusterAdmin().prepareSnapshotStatus().setSnapshots("snapshot-2", "snapshot-3").setRepository(repository).get().getSnapshots(), + hasSize(2) + ); + } + public void testQueuedAfterFailedShardSnapshot() throws Exception { internalCluster().startMasterOnlyNode(); final String dataNode = internalCluster().startDataOnlyNode(); diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index a618e051736f5..8dab0de00dce8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -825,6 +825,7 @@ public ImmutableOpenMap shardsByRepoShar } public Index indexByName(String name) { + assert isClone() == false : "tried to get routing index for clone entry [" + this + "]"; return snapshotIndices.get(name); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 2b272fd80bc3c..12a37c0eaddb4 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -1692,8 +1692,18 @@ private static ImmutableOpenMap processWaitingShar // this shard snapshot is waiting for a previous snapshot to finish execution for this shard final ShardSnapshotStatus knownFailure = knownFailures.get(shardId); if (knownFailure == null) { - // if no failure is known for the shard we keep waiting - shards.put(shardId, shardStatus); + final IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex()); + if (indexShardRoutingTable == null) { + // shard became unassigned while queued so we fail as missing here + assert entry.partial(); + snapshotChanged = true; + logger.debug("failing snapshot of shard [{}] because index got deleted", shardId); + shards.put(shardId, ShardSnapshotStatus.MISSING); + knownFailures.put(shardId, ShardSnapshotStatus.MISSING); + } else { + // if no failure is known for the shard we keep waiting + shards.put(shardId, shardStatus); + } } else { // If a failure is known for an execution we waited on for this shard then we fail with the same exception here // as well @@ -1761,9 +1771,10 @@ private static ImmutableOpenMap processWaitingShar private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) { for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - if (entry.state() == State.STARTED) { + if (entry.state() == State.STARTED && entry.isClone() == false) { for (ObjectObjectCursor shardStatus : entry.shardsByRepoShardId()) { - if (shardStatus.value.state() != ShardState.WAITING) { + final ShardState state = shardStatus.value.state(); + if (state != ShardState.WAITING && state != ShardState.QUEUED) { continue; } final RepositoryShardId shardId = shardStatus.key; @@ -1772,7 +1783,7 @@ private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snap .getRoutingTable() .index(entry.indexByName(shardId.indexName())); if (indexShardRoutingTable == null) { - // index got removed concurrently and we have to fail WAITING state shards + // index got removed concurrently and we have to fail WAITING or QUEUED state shards return true; } ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.shardId()).primaryShard();