From cffd0a26268a84e09de961be0df3672caacba4ce Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 8 Feb 2019 15:59:19 +0100 Subject: [PATCH] Fix Issue with Concurrent Snapshot Init + Delete (#38518) * Fix Issue with Concurrent Snapshot Init + Delete by ensuring that we're not finalizing a snapshot in the repository while it is initializing on another thread * Closes #38489 --- .../snapshots/SnapshotsService.java | 17 ++++++++++++++--- .../DedicatedClusterSnapshotRestoreIT.java | 1 - 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index e2628fda991bd..f6ed3eb75d859 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -331,7 +331,6 @@ public void onFailure(final Exception e) { public TimeValue timeout() { return request.masterNodeTimeout(); } - }); } @@ -394,6 +393,8 @@ private void beginSnapshot(final ClusterState clusterState, boolean snapshotCreated; + boolean hadAbortedInitializations; + @Override protected void doRun() { assert initializingSnapshots.contains(snapshot.snapshot()); @@ -433,6 +434,8 @@ public ClusterState execute(ClusterState currentState) { if (entry.state() == State.ABORTED) { entries.add(entry); + assert entry.shards().isEmpty(); + hadAbortedInitializations = true; } else { // Replace the snapshot that was just initialized ImmutableOpenMap shards = @@ -491,6 +494,14 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS // completion listener in this method. For the snapshot completion to work properly, the snapshot // should still exist when listener is registered. userCreateSnapshotListener.onResponse(snapshot.snapshot()); + + if (hadAbortedInitializations) { + final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); + assert snapshotsInProgress != null; + final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot()); + assert entry != null; + endSnapshot(entry); + } } }); } @@ -701,8 +712,8 @@ public void applyClusterState(ClusterChangedEvent event) { // 3. Snapshots in any other state that have all their shard tasks completed snapshotsInProgress.entries().stream().filter( entry -> entry.state().completed() - || entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false - || entry.state() != State.INIT && completed(entry.shards().values()) + || initializingSnapshots.contains(entry.snapshot()) == false + && (entry.state() == State.INIT || completed(entry.shards().values())) ).forEach(this::endSnapshot); } if (newMaster) { diff --git a/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index b118d3a3d4933..433cbe2fdaa6c 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -855,7 +855,6 @@ public void testMasterShutdownDuringSnapshot() throws Exception { assertEquals(0, snapshotInfo.failedShards()); } - public void testMasterAndDataShutdownDuringSnapshot() throws Exception { logger.info("--> starting three master nodes and two data nodes"); internalCluster().startMasterOnlyNodes(3);