Skip to content

Commit

Permalink
Fix double-pausing shard snapshot (#109148) (#109245)
Browse files Browse the repository at this point in the history
Closes #109143
  • Loading branch information
DaveCTurner authored May 31, 2024
1 parent 630ce22 commit 0f93250
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/109148.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 109148
summary: Fix double-pausing shard snapshot
area: Snapshot/Restore
type: bug
issues:
- 109143
Original file line number Diff line number Diff line change
Expand Up @@ -3354,6 +3354,15 @@ private <T> void executeShardSnapshotUpdate(
updatedState = updateSnapshotState.updatedState;
}

if (updatedState.state() == ShardState.PAUSED_FOR_NODE_REMOVAL) {
// leave subsequent entries for this shard alone until this one is unpaused
iterator.remove();
} else {
// All other shard updates leave the shard in a complete state, which means we should leave this update in the list so
// it can fall through to later entries and start any waiting shard snapshots:
assert updatedState.isActive() == false : updatedState;
}

logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot, updatedShard, updatedState.state());
changedCount++;
newStates.get().put(updatedShard, updatedState);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,70 @@ public void testCompletedCloneStartsNextClone() throws Exception {
assertIsNoop(updatedClusterState, completeShardClone);
}

public void testPauseForNodeRemovalWithQueuedShards() throws Exception {
final var repoName = "test-repo";
final var snapshot1 = snapshot(repoName, "snap-1");
final var snapshot2 = snapshot(repoName, "snap-2");
final var indexName = "index-1";
final var shardId = new ShardId(index(indexName), 0);
final var repositoryShardId = new RepositoryShardId(indexId(indexName), 0);
final var nodeId = uuid();

final var runningEntry = snapshotEntry(
snapshot1,
Collections.singletonMap(indexName, repositoryShardId.index()),
Map.of(shardId, initShardStatus(nodeId))
);

final var queuedEntry = snapshotEntry(
snapshot2,
Collections.singletonMap(indexName, repositoryShardId.index()),
Map.of(shardId, SnapshotsInProgress.ShardSnapshotStatus.UNASSIGNED_QUEUED)
);

final var initialState = stateWithSnapshots(
ClusterState.builder(ClusterState.EMPTY_STATE)
.nodes(DiscoveryNodes.builder().add(DiscoveryNodeUtils.create(nodeId)).localNodeId(nodeId).masterNodeId(nodeId).build())
.routingTable(
RoutingTable.builder()
.add(
IndexRoutingTable.builder(shardId.getIndex())
.addShard(TestShardRouting.newShardRouting(shardId, nodeId, true, ShardRoutingState.STARTED))
)
.build()
)
.build(),
repoName,
runningEntry,
queuedEntry
);

final var updatedState = applyUpdates(
initialState,
new SnapshotsService.ShardSnapshotUpdate(
snapshot1,
shardId,
null,
new SnapshotsInProgress.ShardSnapshotStatus(
nodeId,
SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL,
runningEntry.shards().get(shardId).generation()
),
ActionTestUtils.assertNoFailureListener(t -> {})
)
);

assertEquals(
SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL,
SnapshotsInProgress.get(updatedState).snapshot(snapshot1).shards().get(shardId).state()
);

assertEquals(
SnapshotsInProgress.ShardState.QUEUED,
SnapshotsInProgress.get(updatedState).snapshot(snapshot2).shards().get(shardId).state()
);
}

public void testSnapshottingIndicesExcludesClones() {
final String repoName = "test-repo";
final String indexName = "index";
Expand Down

0 comments on commit 0f93250

Please sign in to comment.