Fix Two Races that Lead to Stuck Snapshots (#37686)

* Fixes two broken spots: 1. Master failover while deleting a snapshot that has no shards will get stuck if the new master finds the 0-shard snapshot in `INIT` when deleting 2. Aborted shards that were never seen in `INIT` state by the `SnapshotsShardService` will not be notified as failed, leading to the snapshot staying in `ABORTED` state and never getting deleted with one or more shards stuck in `ABORTED` state * Tried to make fixes as short as possible so we can backport to `6.x` with the least amount of risk * Significantly extended test infrastructure to reproduce the above two issues * Two new test runs: 1. Reproducing the effects of node disconnects/restarts in isolation 2. Reproducing the effects of disconnects/restarts in parallel with shard relocations and deletes * Relates #32265 * Closes #32348
elastic · Feb 1, 2019 · 0a604e3 · 0a604e3
1 parent 0d56955
commit 0a604e3
Show file tree

Hide file tree

Showing 6 changed files with 1,096 additions and 638 deletions.
diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java
@@ -67,6 +67,10 @@
 import org.elasticsearch.repositories.IndexId;
 import org.elasticsearch.repositories.Repository;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.EmptyTransportResponseHandler;
+import org.elasticsearch.transport.TransportException;
+import org.elasticsearch.transport.TransportRequestDeduplicator;
+import org.elasticsearch.transport.TransportResponse;
 import org.elasticsearch.transport.TransportService;
 
 import java.io.IOException;
@@ -85,7 +89,6 @@
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.unmodifiableMap;
 import static org.elasticsearch.cluster.SnapshotsInProgress.completed;
-import static org.elasticsearch.transport.EmptyTransportResponseHandler.INSTANCE_SAME;
 
 /**
  * This service runs on data and master nodes and controls currently snapshotted shards on these nodes. It is responsible for
@@ -112,6 +115,10 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
 
     private volatile Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> shardSnapshots = emptyMap();
 
+    // A map of snapshots to the shardIds that we already reported to the master as failed
+    private final TransportRequestDeduplicator<UpdateIndexShardSnapshotStatusRequest> remoteFailedRequestDeduplicator =
+        new TransportRequestDeduplicator<>();
+
     private final SnapshotStateExecutor snapshotStateExecutor = new SnapshotStateExecutor();
     private final UpdateSnapshotStatusAction updateSnapshotStatusHandler;
 
@@ -272,12 +279,11 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
                     // Abort all running shards for this snapshot
                     Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.get(entry.snapshot());
                     if (snapshotShards != null) {
-                        final String failure = "snapshot has been aborted";
                         for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
-
                             final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
                             if (snapshotStatus != null) {
-                                final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.abortIfNotCompleted(failure);
+                                final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
+                                    snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
                                 final Stage stage = lastSnapshotStatus.getStage();
                                 if (stage == Stage.FINALIZE) {
                                     logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
@@ -295,6 +301,15 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
                                 }
                             }
                         }
+                    } else {
+                        final Snapshot snapshot = entry.snapshot();
+                        for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> curr : entry.shards()) {
+                            // due to CS batching we might have missed the INIT state and straight went into ABORTED
+                            // notify master that abort has completed by moving to FAILED
+                            if (curr.value.state() == State.ABORTED) {
+                                notifyFailedSnapshotShard(snapshot, curr.key, localNodeId, curr.value.reason());
+                            }
+                        }
                     }
                 }
             }
@@ -515,12 +530,33 @@ void notifyFailedSnapshotShard(final Snapshot snapshot, final ShardId shardId, f
 
     /** Updates the shard snapshot status by sending a {@link UpdateIndexShardSnapshotStatusRequest} to the master node */
     void sendSnapshotShardUpdate(final Snapshot snapshot, final ShardId shardId, final ShardSnapshotStatus status) {
-        try {
-            UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status);
-            transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, request, INSTANCE_SAME);
-        } catch (Exception e) {
-            logger.warn(() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
-        }
+        remoteFailedRequestDeduplicator.executeOnce(
+            new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status),
+            new ActionListener<Void>() {
+                @Override
+                public void onResponse(Void aVoid) {
+                    logger.trace("[{}] [{}] updated snapshot state", snapshot, status);
+                }
+
+                @Override
+                public void onFailure(Exception e) {
+                    logger.warn(
+                        () -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
+                }
+            },
+            (req, reqListener) -> transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, req,
+                new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
+                    @Override
+                    public void handleResponse(TransportResponse.Empty response) {
+                        reqListener.onResponse(null);
+                    }
+
+                    @Override
+                    public void handleException(TransportException exp) {
+                        reqListener.onFailure(exp);
+                    }
+                })
+        );
     }
 
     /**

diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
@@ -1210,7 +1210,10 @@ public ClusterState execute(ClusterState currentState) throws Exception {
                     if (state == State.INIT) {
                         // snapshot is still initializing, mark it as aborted
                         shards = snapshotEntry.shards();
-
+                        assert shards.isEmpty();
+                        // No shards in this snapshot, we delete it right away since the SnapshotShardsService
+                        // has no work to do.
+                        endSnapshot(snapshotEntry);
                     } else if (state == State.STARTED) {
                         // snapshot is started - mark every non completed shard as aborted
                         final ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shardsBuilder = ImmutableOpenMap.builder();