Skip to content

Commit

Permalink
Fix Two Races that Lead to Stuck Snapshots (#37686)
Browse files Browse the repository at this point in the history
* Fixes two broken spots:
    1. Master failover while deleting a snapshot that has no shards will get stuck if the new master finds the 0-shard snapshot in `INIT` when deleting
    2. Aborted shards that were never seen in `INIT` state by the `SnapshotsShardService` will not be notified as failed, leading to the snapshot staying in `ABORTED` state and never getting deleted with one or more shards stuck in `ABORTED` state
* Tried to make fixes as short as possible so we can backport to `6.x` with the least amount of risk
* Significantly extended test infrastructure to reproduce the above two issues
  * Two new test runs:
      1. Reproducing the effects of node disconnects/restarts in isolation
      2. Reproducing the effects of disconnects/restarts in parallel with shard relocations and deletes
* Relates #32265 
* Closes #32348
  • Loading branch information
original-brownbear authored Feb 1, 2019
1 parent 0d56955 commit 0a604e3
Show file tree
Hide file tree
Showing 6 changed files with 1,096 additions and 638 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@
import org.elasticsearch.repositories.IndexId;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.EmptyTransportResponseHandler;
import org.elasticsearch.transport.TransportException;
import org.elasticsearch.transport.TransportRequestDeduplicator;
import org.elasticsearch.transport.TransportResponse;
import org.elasticsearch.transport.TransportService;

import java.io.IOException;
Expand All @@ -85,7 +89,6 @@
import static java.util.Collections.emptyMap;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.cluster.SnapshotsInProgress.completed;
import static org.elasticsearch.transport.EmptyTransportResponseHandler.INSTANCE_SAME;

/**
* This service runs on data and master nodes and controls currently snapshotted shards on these nodes. It is responsible for
Expand All @@ -112,6 +115,10 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements

private volatile Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> shardSnapshots = emptyMap();

// A map of snapshots to the shardIds that we already reported to the master as failed
private final TransportRequestDeduplicator<UpdateIndexShardSnapshotStatusRequest> remoteFailedRequestDeduplicator =
new TransportRequestDeduplicator<>();

private final SnapshotStateExecutor snapshotStateExecutor = new SnapshotStateExecutor();
private final UpdateSnapshotStatusAction updateSnapshotStatusHandler;

Expand Down Expand Up @@ -272,12 +279,11 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
// Abort all running shards for this snapshot
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.get(entry.snapshot());
if (snapshotShards != null) {
final String failure = "snapshot has been aborted";
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {

final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
if (snapshotStatus != null) {
final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.abortIfNotCompleted(failure);
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
final Stage stage = lastSnapshotStatus.getStage();
if (stage == Stage.FINALIZE) {
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
Expand All @@ -295,6 +301,15 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
}
}
}
} else {
final Snapshot snapshot = entry.snapshot();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> curr : entry.shards()) {
// due to CS batching we might have missed the INIT state and straight went into ABORTED
// notify master that abort has completed by moving to FAILED
if (curr.value.state() == State.ABORTED) {
notifyFailedSnapshotShard(snapshot, curr.key, localNodeId, curr.value.reason());
}
}
}
}
}
Expand Down Expand Up @@ -515,12 +530,33 @@ void notifyFailedSnapshotShard(final Snapshot snapshot, final ShardId shardId, f

/** Updates the shard snapshot status by sending a {@link UpdateIndexShardSnapshotStatusRequest} to the master node */
void sendSnapshotShardUpdate(final Snapshot snapshot, final ShardId shardId, final ShardSnapshotStatus status) {
try {
UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status);
transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, request, INSTANCE_SAME);
} catch (Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
}
remoteFailedRequestDeduplicator.executeOnce(
new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status),
new ActionListener<Void>() {
@Override
public void onResponse(Void aVoid) {
logger.trace("[{}] [{}] updated snapshot state", snapshot, status);
}

@Override
public void onFailure(Exception e) {
logger.warn(
() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
}
},
(req, reqListener) -> transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, req,
new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override
public void handleResponse(TransportResponse.Empty response) {
reqListener.onResponse(null);
}

@Override
public void handleException(TransportException exp) {
reqListener.onFailure(exp);
}
})
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,10 @@ public ClusterState execute(ClusterState currentState) throws Exception {
if (state == State.INIT) {
// snapshot is still initializing, mark it as aborted
shards = snapshotEntry.shards();

assert shards.isEmpty();
// No shards in this snapshot, we delete it right away since the SnapshotShardsService
// has no work to do.
endSnapshot(snapshotEntry);
} else if (state == State.STARTED) {
// snapshot is started - mark every non completed shard as aborted
final ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shardsBuilder = ImmutableOpenMap.builder();
Expand Down
Loading

0 comments on commit 0a604e3

Please sign in to comment.