Skip to content

Commit

Permalink
Fix Finalizing Failed Shard Snapshots (elastic#75953)
Browse files Browse the repository at this point in the history
We must never write generations extracted out of failed shard snapshot status values
to the repository as these can not be trusted in all cases. Instead we must always put a `null`
for these into the generations object to write to the repo so that the existing generations are
not changed.
  • Loading branch information
original-brownbear committed Aug 16, 2021
1 parent 0f61ef4 commit 38dea23
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,40 @@ public void testOutOfOrderCloneFinalization() throws Exception {
);
}

public void testCorrectlyFinalizeOutOfOrderPartialFailures() throws Exception {
internalCluster().startMasterOnlyNode();
final String dataNode1 = internalCluster().startDataOnlyNode();
final String dataNode2 = internalCluster().startDataOnlyNode();
final String index1 = "index-1";
final String index2 = "index-2";
createIndexWithContent(index1, dataNode1, dataNode2);
createIndexWithContent(index2, dataNode2, dataNode1);

final String repository = "test-repo";
createRepository(repository, "mock");

createFullSnapshot(repository, "snapshot-1");
index(index1, "_doc", "some_doc", org.elasticsearch.core.Map.of("foo", "bar"));
index(index2, "_doc", "some_doc", org.elasticsearch.core.Map.of("foo", "bar"));
blockAndFailDataNode(repository, dataNode1);
blockDataNode(repository, dataNode2);
final ActionFuture<CreateSnapshotResponse> snapshotBlocked = startFullSnapshot(repository, "snapshot-2");
waitForBlock(dataNode1, repository);
waitForBlock(dataNode2, repository);

unblockNode(repository, dataNode1);
assertAcked(clusterAdmin().prepareCloneSnapshot(repository, "snapshot-1", "target-1").setIndices(index1).get());
unblockNode(repository, dataNode2);
snapshotBlocked.get();

assertThat(
clusterAdmin().prepareSnapshotStatus().setSnapshots("target-1").setRepository(repository).get().getSnapshots(),
hasSize(1)
);

createFullSnapshot(repository, "snapshot-3");
}

public void testIndexDeletedWhileSnapshotQueuedAfterClone() throws Exception {
final String master = internalCluster().startMasterOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
internalCluster().startDataOnlyNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.repositories;

import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;

Expand Down Expand Up @@ -212,6 +213,11 @@ public Builder putAll(ShardGenerations shardGenerations) {
return this;
}

public Builder put(IndexId indexId, int shardId, SnapshotsInProgress.ShardSnapshotStatus status) {
// only track generations for successful shard status values
return put(indexId, shardId, status.state().failed() ? null : status.generation());
}

public Builder put(IndexId indexId, int shardId, ShardGeneration generation) {
ShardGeneration existingGeneration = generations.computeIfAbsent(indexId, i -> new HashMap<>()).put(shardId, generation);
assert generation != null || existingGeneration == null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1293,15 +1293,15 @@ public void onNoLongerMaster() {
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, Metadata metadata) {
ShardGenerations.Builder builder = ShardGenerations.builder();
if (snapshot.isClone()) {
snapshot.shardsByRepoShardId().forEach(c -> builder.put(c.key.index(), c.key.shardId(), c.value.generation()));
snapshot.shardsByRepoShardId().forEach(c -> builder.put(c.key.index(), c.key.shardId(), c.value));
} else {
snapshot.shardsByRepoShardId().forEach(c -> {
final Index index = snapshot.indexByName(c.key.indexName());
if (metadata.index(index) == null) {
assert snapshot.partial() : "Index [" + index + "] was deleted during a snapshot but snapshot was not partial.";
return;
}
builder.put(c.key.index(), c.key.shardId(), c.value.generation());
builder.put(c.key.index(), c.key.shardId(), c.value);
});
}
return builder.build();
Expand Down

0 comments on commit 38dea23

Please sign in to comment.