-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce SNAPSHOT_META Threadpool for Fetching Repository Metadata #73172
Changes from all commits
c4e1a43
f513d2a
b4ba77e
28eea50
fb55daa
24cf149
bde33f6
9f465ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,6 @@ | |
import org.apache.lucene.util.CollectionUtil; | ||
import org.elasticsearch.ElasticsearchException; | ||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.action.ActionRunnable; | ||
import org.elasticsearch.action.StepListener; | ||
import org.elasticsearch.action.admin.cluster.repositories.get.TransportGetRepositoriesAction; | ||
import org.elasticsearch.action.support.ActionFilters; | ||
|
@@ -34,6 +33,7 @@ | |
import org.elasticsearch.repositories.RepositoriesService; | ||
import org.elasticsearch.repositories.Repository; | ||
import org.elasticsearch.repositories.RepositoryData; | ||
import org.elasticsearch.repositories.RepositoryMissingException; | ||
import org.elasticsearch.snapshots.SnapshotException; | ||
import org.elasticsearch.snapshots.SnapshotId; | ||
import org.elasticsearch.snapshots.SnapshotInfo; | ||
|
@@ -46,12 +46,15 @@ | |
import org.elasticsearch.transport.TransportService; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.BlockingQueue; | ||
import java.util.concurrent.LinkedBlockingQueue; | ||
import java.util.stream.Collectors; | ||
|
||
import static java.util.Collections.unmodifiableList; | ||
|
@@ -211,8 +214,7 @@ private void loadSnapshotInfos(SnapshotsInProgress snapshotsInProgress, String r | |
} | ||
|
||
if (verbose) { | ||
threadPool.generic().execute(ActionRunnable.supply( | ||
listener, () -> snapshots(snapshotsInProgress, repo, new ArrayList<>(toResolve), ignoreUnavailable, task))); | ||
snapshots(snapshotsInProgress, repo, toResolve, ignoreUnavailable, task, listener); | ||
} else { | ||
final List<SnapshotInfo> snapshotInfos; | ||
if (repositoryData != null) { | ||
|
@@ -235,12 +237,16 @@ private void loadSnapshotInfos(SnapshotsInProgress snapshotsInProgress, String r | |
* @param snapshotIds snapshots for which to fetch snapshot information | ||
* @param ignoreUnavailable if true, snapshots that could not be read will only be logged with a warning, | ||
* if false, they will throw an error | ||
* @return list of snapshots | ||
*/ | ||
private List<SnapshotInfo> snapshots(SnapshotsInProgress snapshotsInProgress, String repositoryName, | ||
List<SnapshotId> snapshotIds, boolean ignoreUnavailable, CancellableTask task) { | ||
private void snapshots(SnapshotsInProgress snapshotsInProgress, | ||
String repositoryName, | ||
Collection<SnapshotId> snapshotIds, | ||
boolean ignoreUnavailable, | ||
CancellableTask task, | ||
ActionListener<List<SnapshotInfo>> listener) { | ||
if (task.isCancelled()) { | ||
throw new TaskCancelledException("task cancelled"); | ||
listener.onFailure(new TaskCancelledException("task cancelled")); | ||
return; | ||
} | ||
final Set<SnapshotInfo> snapshotSet = new HashSet<>(); | ||
final Set<SnapshotId> snapshotIdsToIterate = new HashSet<>(snapshotIds); | ||
|
@@ -252,28 +258,88 @@ private List<SnapshotInfo> snapshots(SnapshotsInProgress snapshotsInProgress, St | |
snapshotSet.add(new SnapshotInfo(entry)); | ||
} | ||
} | ||
// then, look in the repository | ||
final Repository repository = repositoriesService.repository(repositoryName); | ||
for (SnapshotId snapshotId : snapshotIdsToIterate) { | ||
// then, look in the repository if there's any matching snapshots left | ||
final List<SnapshotInfo> snapshotInfos; | ||
if (snapshotIdsToIterate.isEmpty()) { | ||
snapshotInfos = Collections.emptyList(); | ||
} else { | ||
snapshotInfos = Collections.synchronizedList(new ArrayList<>()); | ||
} | ||
final ActionListener<Collection<Void>> allDoneListener = listener.delegateFailure((l, v) -> { | ||
final ArrayList<SnapshotInfo> snapshotList = new ArrayList<>(snapshotInfos); | ||
snapshotList.addAll(snapshotSet); | ||
CollectionUtil.timSort(snapshotList); | ||
listener.onResponse(unmodifiableList(snapshotList)); | ||
}); | ||
if (snapshotIdsToIterate.isEmpty()) { | ||
allDoneListener.onResponse(Collections.emptyList()); | ||
return; | ||
} | ||
// put snapshot info downloads into a task queue instead of pushing them all into the queue to not completely monopolize the | ||
// snapshot meta pool for a single request | ||
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_META).getMax(), snapshotIdsToIterate.size()); | ||
final BlockingQueue<SnapshotId> queue = new LinkedBlockingQueue<>(snapshotIdsToIterate); | ||
final ActionListener<Void> workerDoneListener = new GroupedActionListener<>(allDoneListener, workers).delegateResponse((l, e) -> { | ||
queue.clear(); // Stop fetching the remaining snapshots once we've failed fetching one since the response is an error response | ||
// anyway in this case | ||
l.onFailure(e); | ||
}); | ||
final Repository repository; | ||
try { | ||
repository = repositoriesService.repository(repositoryName); | ||
} catch (RepositoryMissingException e) { | ||
listener.onFailure(e); | ||
return; | ||
} | ||
for (int i = 0; i < workers; i++) { | ||
getOneSnapshotInfo( | ||
ignoreUnavailable, | ||
repository, | ||
queue, | ||
snapshotInfos, | ||
task, | ||
workerDoneListener | ||
); | ||
} | ||
} | ||
|
||
/** | ||
* Tries to poll a {@link SnapshotId} to load {@link SnapshotInfo} for from the given {@code queue}. If it finds one in the queue, | ||
* loads the snapshot info from the repository and adds it to the given {@code snapshotInfos} collection, then invokes itself again to | ||
* try and poll another task from the queue. | ||
* If the queue is empty resolves {@code} listener. | ||
*/ | ||
private void getOneSnapshotInfo(boolean ignoreUnavailable, | ||
Repository repository, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should retrieve the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could I guess but it's not going to be a big cleanup/win since this situation is somewhat broken to begin with. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree but I still think it is worth not mixing thrown exceptions and listeners here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh 🤦 now I get your comment. Sorry, I completely misread it for no good reason :( => Fix coming right up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pushed fb55daa to address this (and random formatting noise) now :) I went with this instead of looking up the repo in the loop, because the latter would be caught and suppressed by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fb55daa looks good, thanks! And sorry if I wasn't clear at first :) |
||
BlockingQueue<SnapshotId> queue, | ||
Collection<SnapshotInfo> snapshotInfos, | ||
CancellableTask task, | ||
ActionListener<Void> listener) { | ||
final SnapshotId snapshotId = queue.poll(); | ||
if (snapshotId == null) { | ||
listener.onResponse(null); | ||
return; | ||
} | ||
threadPool.executor(ThreadPool.Names.SNAPSHOT_META).execute(() -> { | ||
if (task.isCancelled()) { | ||
throw new TaskCancelledException("task cancelled"); | ||
listener.onFailure(new TaskCancelledException("task cancelled")); | ||
return; | ||
} | ||
try { | ||
snapshotSet.add(repository.getSnapshotInfo(snapshotId)); | ||
snapshotInfos.add(repository.getSnapshotInfo(snapshotId)); | ||
} catch (Exception ex) { | ||
if (ignoreUnavailable) { | ||
logger.warn(() -> new ParameterizedMessage("failed to get snapshot [{}]", snapshotId), ex); | ||
} else { | ||
if (ex instanceof SnapshotException) { | ||
throw ex; | ||
} | ||
throw new SnapshotException(repositoryName, snapshotId, "Snapshot could not be read", ex); | ||
listener.onFailure( | ||
ex instanceof SnapshotException | ||
? ex | ||
: new SnapshotException(repository.getMetadata().name(), snapshotId, "Snapshot could not be read", ex) | ||
); | ||
} | ||
} | ||
} | ||
final ArrayList<SnapshotInfo> snapshotList = new ArrayList<>(snapshotSet); | ||
CollectionUtil.timSort(snapshotList); | ||
return unmodifiableList(snapshotList); | ||
getOneSnapshotInfo(ignoreUnavailable, repository, queue, snapshotInfos, task, listener); | ||
}); | ||
} | ||
|
||
private boolean isAllSnapshots(String[] snapshots) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can we add a bit of Javadoc?