-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Issue with Concurrent Snapshot Init + Delete #38518
Changes from 2 commits
6387014
3a95407
28128cd
5aafa44
46bb7a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -331,7 +331,6 @@ public void onFailure(final Exception e) { | |
public TimeValue timeout() { | ||
return request.masterNodeTimeout(); | ||
} | ||
|
||
}); | ||
} | ||
|
||
|
@@ -394,6 +393,8 @@ private void beginSnapshot(final ClusterState clusterState, | |
|
||
boolean snapshotCreated; | ||
|
||
boolean hadAbortedInitializations; | ||
|
||
@Override | ||
protected void doRun() { | ||
assert initializingSnapshots.contains(snapshot.snapshot()); | ||
|
@@ -433,6 +434,8 @@ public ClusterState execute(ClusterState currentState) { | |
|
||
if (entry.state() == State.ABORTED) { | ||
entries.add(entry); | ||
assert entry.shards().isEmpty(); | ||
hadAbortedInitializations = true; | ||
} else { | ||
// Replace the snapshot that was just initialized | ||
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards = | ||
|
@@ -491,6 +494,16 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
// completion listener in this method. For the snapshot completion to work properly, the snapshot | ||
// should still exist when listener is registered. | ||
userCreateSnapshotListener.onResponse(snapshot.snapshot()); | ||
|
||
if (hadAbortedInitializations) { | ||
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); | ||
if (snapshotsInProgress != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should be able to assert != null here? |
||
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot()); | ||
if (entry != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should be able to assert != null here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess I was paranoid about potentially throwing an NPE in the cluster state thread :D but yea, |
||
endSnapshot(entry); | ||
} | ||
} | ||
} | ||
} | ||
}); | ||
} | ||
|
@@ -701,8 +714,8 @@ public void applyClusterState(ClusterChangedEvent event) { | |
// 3. Snapshots in any other state that have all their shard tasks completed | ||
snapshotsInProgress.entries().stream().filter( | ||
entry -> entry.state().completed() | ||
|| entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false | ||
|| entry.state() != State.INIT && completed(entry.shards().values()) | ||
|| initializingSnapshots.contains(entry.snapshot()) == false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before we were finalising all snapshots that had all their shards completed here. With this change we will never finalise a snapshot here that is still being initialised. |
||
&& (entry.state() == State.INIT || completed(entry.shards().values())) | ||
).forEach(this::endSnapshot); | ||
} | ||
if (newMaster) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we now don't finalise this case in
applyClusterState
we need to end it when the cluster state for the init has been processed, so we set the flag here to mark that case.