Skip to content

Commit

Permalink
CCS with minimize_roundtrips performs incremental merges of each Sear…
Browse files Browse the repository at this point in the history
…chResponse (#103134)

* CCS with minimize_roundtrips performs incremental merges of each SearchResponse

To help address the issue of slow-to-respond clusters in a cross-cluster search,
async-search based CCS with minimize_roundtrips=true performs incremental
merges of each SearchResponse as they come in from each cluster (including
the local cluster).

This means, any time the user calls GET _async_search/:id, they will now get
search hits and/or aggregation results from any clusters that have finished so far,
as well as any partial aggs from the local cluster (existing functionality).

The `is_running` field in the async-search response should be used to determine
whether at least one cluster has still not reported back its final results.

The SearchResponses are collected by MutableSearchResponse.
When a user requests an AsyncSearchResponse, if the final response (from onResponse)
has not been received, then it will create a new SearchResponseMerger on the fly
using the Supplier of SearchResponseMerger in the SearchTask. This is non-null only
for CCS MRT=true.
  • Loading branch information
quux00 authored Jan 16, 2024
1 parent 0c98fb2 commit 9cc331c
Show file tree
Hide file tree
Showing 9 changed files with 765 additions and 26 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/103134.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 103134
summary: CCS with `minimize_roundtrips` performs incremental merges of each `SearchResponse`
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ protected void onFetchResult(int shardIndex) {}
*/
protected void onFetchFailure(int shardIndex, SearchShardTarget shardTarget, Exception exc) {}

/**
* Indicates that a cluster has finished a search operation. Used for CCS minimize_roundtrips=true only.
*
* @param clusterAlias alias of cluster that has finished a search operation and returned a SearchResponse.
* The cluster alias for the local cluster is RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY.
* @param searchResponse SearchResponse from cluster 'clusterAlias'
*/
protected void onClusterResponseMinimizeRoundtrips(String clusterAlias, SearchResponse searchResponse) {}

final void notifyListShards(
List<SearchShard> shards,
List<SearchShard> skippedShards,
Expand Down Expand Up @@ -167,6 +176,14 @@ final void notifyFetchFailure(int shardIndex, SearchShardTarget shardTarget, Exc
}
}

final void notifyClusterResponseMinimizeRoundtrips(String clusterAlias, SearchResponse searchResponse) {
try {
onClusterResponseMinimizeRoundtrips(clusterAlias, searchResponse);
} catch (Exception e) {
logger.warn(() -> "[" + clusterAlias + "] Failed to execute progress listener onResponseMinimizeRoundtrips", e);
}
}

static List<SearchShard> buildSearchShards(List<? extends SearchPhaseResult> results) {
return results.stream()
.filter(Objects::nonNull)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
// TODO it may make sense to integrate the remote clusters responses as a shard response in the initial search phase and ignore hits coming
// from the remote clusters in the fetch phase. This would be identical to the removed QueryAndFetch strategy except that only the remote
// cluster response would have the fetch results.
final class SearchResponseMerger implements Releasable {
public final class SearchResponseMerger implements Releasable {
final int from;
final int size;
final int trackTotalHitsUpTo;
Expand Down Expand Up @@ -98,7 +98,7 @@ final class SearchResponseMerger implements Releasable {
* Merges currently happen at once when all responses are available and {@link #getMergedResponse(Clusters)} )} is called.
* That may change in the future as it's possible to introduce incremental merges as responses come in if necessary.
*/
void add(SearchResponse searchResponse) {
public void add(SearchResponse searchResponse) {
assert searchResponse.getScrollId() == null : "merging scroll results is not supported";
searchResponse.mustIncRef();
searchResponses.add(searchResponse);
Expand All @@ -109,10 +109,13 @@ int numResponses() {
}

/**
* Returns the merged response. To be called once all responses have been added through {@link #add(SearchResponse)}
* so that all responses are merged into a single one.
* Returns the merged response of all SearchResponses received so far. Can be called at any point,
* including when only some clusters have finished, in order to get "incremental" partial results.
* @param clusters The Clusters object for the search to report on the status of each cluster
* involved in the cross-cluster search
* @return merged response
*/
SearchResponse getMergedResponse(Clusters clusters) {
public SearchResponse getMergedResponse(Clusters clusters) {
// if the search is only across remote clusters, none of them are available, and all of them have skip_unavailable set to true,
// we end up calling merge without anything to merge, we just return an empty search response
if (searchResponses.size() == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class SearchTask extends CancellableTask {
// generating description in a lazy way since source can be quite big
private final Supplier<String> descriptionSupplier;
private SearchProgressListener progressListener = SearchProgressListener.NOOP;
private Supplier<SearchResponseMerger> searchResponseMergerSupplier; // used for CCS minimize_roundtrips=true

public SearchTask(
long id,
Expand Down Expand Up @@ -53,4 +54,19 @@ public final SearchProgressListener getProgressListener() {
return progressListener;
}

/**
* @return the Supplier of {@link SearchResponseMerger} attached to this task. Will be null
* for local-only search and cross-cluster searches with minimize_roundtrips=false.
*/
public Supplier<SearchResponseMerger> getSearchResponseMergerSupplier() {
return searchResponseMergerSupplier;
}

/**
* @param supplier Attach a Supplier of {@link SearchResponseMerger} to this task.
* For use with CCS minimize_roundtrips=true
*/
public void setSearchResponseMergerSupplier(Supplier<SearchResponseMerger> supplier) {
this.searchResponseMergerSupplier = supplier;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ void executeRequest(
.notifyListShards(Collections.emptyList(), Collections.emptyList(), clusters, false, timeProvider);
}
ccsRemoteReduce(
task,
parentTaskId,
rewritten,
localIndices,
Expand Down Expand Up @@ -496,6 +497,7 @@ public static boolean shouldMinimizeRoundtrips(SearchRequest searchRequest) {
* Handles ccs_minimize_roundtrips=true
*/
static void ccsRemoteReduce(
SearchTask task,
TaskId parentTaskId,
SearchRequest searchRequest,
OriginalIndices localIndices,
Expand Down Expand Up @@ -532,7 +534,6 @@ static void ccsRemoteReduce(
remoteClusterClient.search(ccsSearchRequest, new ActionListener<>() {
@Override
public void onResponse(SearchResponse searchResponse) {
// TODO: in CCS fail fast ticket we may need to fail the query if the cluster is marked as FAILED
// overwrite the existing cluster entry with the updated one
ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, skipUnavailable);
Map<String, SearchProfileShardResult> profileResults = searchResponse.getProfileResults();
Expand Down Expand Up @@ -580,6 +581,9 @@ public void onFailure(Exception e) {
timeProvider,
aggReduceContextBuilder
);
task.setSearchResponseMergerSupplier(
() -> createSearchResponseMerger(searchRequest.source(), timeProvider, aggReduceContextBuilder)
);
final AtomicReference<Exception> exceptions = new AtomicReference<>();
int totalClusters = remoteIndices.size() + (localIndices == null ? 0 : 1);
final CountDown countDown = new CountDown(totalClusters);
Expand All @@ -602,6 +606,7 @@ public void onFailure(Exception e) {
exceptions,
searchResponseMerger,
clusters,
task.getProgressListener(),
listener
);
Client remoteClusterClient = remoteClusterService.getRemoteClusterClient(
Expand All @@ -619,6 +624,7 @@ public void onFailure(Exception e) {
exceptions,
searchResponseMerger,
clusters,
task.getProgressListener(),
listener
);
SearchRequest ccsLocalSearchRequest = SearchRequest.subSearchRequest(
Expand Down Expand Up @@ -759,6 +765,7 @@ private static ActionListener<SearchResponse> createCCSListener(
AtomicReference<Exception> exceptions,
SearchResponseMerger searchResponseMerger,
SearchResponse.Clusters clusters,
SearchProgressListener progressListener,
ActionListener<SearchResponse> originalListener
) {
return new CCSActionListener<>(
Expand All @@ -771,9 +778,9 @@ private static ActionListener<SearchResponse> createCCSListener(
) {
@Override
void innerOnResponse(SearchResponse searchResponse) {
// TODO: in CCS fail fast ticket we may need to fail the query if the cluster gets marked as FAILED
ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, skipUnavailable);
searchResponseMerger.add(searchResponse);
progressListener.notifyClusterResponseMinimizeRoundtrips(clusterAlias, searchResponse);
}

@Override
Expand Down Expand Up @@ -1494,7 +1501,6 @@ public final void onFailure(Exception e) {
if (cluster != null) {
ccsClusterInfoUpdate(f, clusters, clusterAlias, true);
}
// skippedClusters.incrementAndGet();
} else {
if (cluster != null) {
ccsClusterInfoUpdate(f, clusters, clusterAlias, false);
Expand Down
Loading

0 comments on commit 9cc331c

Please sign in to comment.