From 0f93250a85e61444e1f583908415704bf3f0da89 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 31 May 2024 14:29:37 +0100 Subject: [PATCH] Fix double-pausing shard snapshot (#109148) (#109245) Closes #109143 --- docs/changelog/109148.yaml | 6 ++ .../snapshots/SnapshotsService.java | 9 +++ .../snapshots/SnapshotsServiceTests.java | 64 +++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 docs/changelog/109148.yaml diff --git a/docs/changelog/109148.yaml b/docs/changelog/109148.yaml new file mode 100644 index 0000000000000..902da6f1a1db3 --- /dev/null +++ b/docs/changelog/109148.yaml @@ -0,0 +1,6 @@ +pr: 109148 +summary: Fix double-pausing shard snapshot +area: Snapshot/Restore +type: bug +issues: + - 109143 diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index d505a6ded4809..1bc571f7c7750 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -3354,6 +3354,15 @@ private void executeShardSnapshotUpdate( updatedState = updateSnapshotState.updatedState; } + if (updatedState.state() == ShardState.PAUSED_FOR_NODE_REMOVAL) { + // leave subsequent entries for this shard alone until this one is unpaused + iterator.remove(); + } else { + // All other shard updates leave the shard in a complete state, which means we should leave this update in the list so + // it can fall through to later entries and start any waiting shard snapshots: + assert updatedState.isActive() == false : updatedState; + } + logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot, updatedShard, updatedState.state()); changedCount++; newStates.get().put(updatedShard, updatedState); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java index 56a28b11edfe7..bcc7a23bbec53 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java @@ -401,6 +401,70 @@ public void testCompletedCloneStartsNextClone() throws Exception { assertIsNoop(updatedClusterState, completeShardClone); } + public void testPauseForNodeRemovalWithQueuedShards() throws Exception { + final var repoName = "test-repo"; + final var snapshot1 = snapshot(repoName, "snap-1"); + final var snapshot2 = snapshot(repoName, "snap-2"); + final var indexName = "index-1"; + final var shardId = new ShardId(index(indexName), 0); + final var repositoryShardId = new RepositoryShardId(indexId(indexName), 0); + final var nodeId = uuid(); + + final var runningEntry = snapshotEntry( + snapshot1, + Collections.singletonMap(indexName, repositoryShardId.index()), + Map.of(shardId, initShardStatus(nodeId)) + ); + + final var queuedEntry = snapshotEntry( + snapshot2, + Collections.singletonMap(indexName, repositoryShardId.index()), + Map.of(shardId, SnapshotsInProgress.ShardSnapshotStatus.UNASSIGNED_QUEUED) + ); + + final var initialState = stateWithSnapshots( + ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(DiscoveryNodes.builder().add(DiscoveryNodeUtils.create(nodeId)).localNodeId(nodeId).masterNodeId(nodeId).build()) + .routingTable( + RoutingTable.builder() + .add( + IndexRoutingTable.builder(shardId.getIndex()) + .addShard(TestShardRouting.newShardRouting(shardId, nodeId, true, ShardRoutingState.STARTED)) + ) + .build() + ) + .build(), + repoName, + runningEntry, + queuedEntry + ); + + final var updatedState = applyUpdates( + initialState, + new SnapshotsService.ShardSnapshotUpdate( + snapshot1, + shardId, + null, + new SnapshotsInProgress.ShardSnapshotStatus( + nodeId, + SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL, + runningEntry.shards().get(shardId).generation() + ), + ActionTestUtils.assertNoFailureListener(t -> {}) + ) + ); + + assertEquals( + SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL, + SnapshotsInProgress.get(updatedState).snapshot(snapshot1).shards().get(shardId).state() + ); + + assertEquals( + SnapshotsInProgress.ShardState.QUEUED, + SnapshotsInProgress.get(updatedState).snapshot(snapshot2).shards().get(shardId).state() + ); + } + public void testSnapshottingIndicesExcludesClones() { final String repoName = "test-repo"; final String indexName = "index";