Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix aggregation memory leak for CCS #78404

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.ccs;

import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.search.ClearScrollRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.AbstractMultiClustersTestCase;
import org.elasticsearch.test.InternalTestCluster;
import org.elasticsearch.transport.TransportService;
import org.hamcrest.Matchers;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo;

public class CrossClusterSearchLeakIT extends AbstractMultiClustersTestCase {

@Override
protected Collection<String> remoteClusterAlias() {
return List.of("cluster_a");
}

@Override
protected boolean reuseClusters() {
return false;
}

private int indexDocs(Client client, String field, String index) {
int numDocs = between(1, 200);
for (int i = 0; i < numDocs; i++) {
client.prepareIndex(index).setSource(field, "v" + i).get();
}
client.admin().indices().prepareRefresh(index).get();
return numDocs;
}

/**
* This test validates that we do not leak any memory when running CCS in various modes, actual validation is done by test framework
* (leak detection)
* <ul>
* <li>proxy vs non-proxy</li>
* <li>single-phase query-fetch or multi-phase</li>
* <li>minimize roundtrip vs not</li>
* <li>scroll vs no scroll</li>
* </ul>
*/
public void testSearch() throws Exception {
assertAcked(client(LOCAL_CLUSTER).admin().indices().prepareCreate("demo")
.setMapping("f", "type=keyword")
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 3))));
indexDocs(client(LOCAL_CLUSTER), "ignored", "demo");
final InternalTestCluster remoteCluster = cluster("cluster_a");
int minRemotes = between(2, 5);
remoteCluster.ensureAtLeastNumDataNodes(minRemotes);
List<String> remoteDataNodes = StreamSupport.stream(remoteCluster.clusterService().state().nodes().spliterator(), false)
.filter(DiscoveryNode::canContainData)
.map(DiscoveryNode::getName)
.collect(Collectors.toList());
assertThat(remoteDataNodes.size(), Matchers.greaterThanOrEqualTo(minRemotes));
List<String> seedNodes = randomSubsetOf(between(1, remoteDataNodes.size() - 1), remoteDataNodes);
disconnectFromRemoteClusters();
configureRemoteCluster("cluster_a", seedNodes);
final Settings.Builder allocationFilter = Settings.builder();
if (rarely()) {
allocationFilter.put("index.routing.allocation.include._name", String.join(",", seedNodes));
} else {
// Provoke using proxy connections
allocationFilter.put("index.routing.allocation.exclude._name", String.join(",", seedNodes));
}
assertAcked(client("cluster_a").admin().indices().prepareCreate("prod")
.setMapping("f", "type=keyword")
.setSettings(Settings.builder().put(allocationFilter.build())
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 3))));
assertFalse(client("cluster_a").admin().cluster().prepareHealth("prod")
.setWaitForYellowStatus().setTimeout(TimeValue.timeValueSeconds(10)).get().isTimedOut());
int docs = indexDocs(client("cluster_a"), "f", "prod");

List<ActionFuture<SearchResponse>> futures = new ArrayList<>();
for (int i = 0; i < 10; ++i) {
String[] indices = randomBoolean() ? new String[] { "demo", "cluster_a:prod" } : new String[] { "cluster_a:prod" };
final SearchRequest searchRequest = new SearchRequest(indices);
searchRequest.allowPartialSearchResults(false);
searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder())
.aggregation(terms("f").field("f").size(docs + between(0, 10))).size(between(0, 1000)));
if (randomBoolean()) {
searchRequest.scroll("30s");
}
searchRequest.setCcsMinimizeRoundtrips(rarely());
futures.add(client(LOCAL_CLUSTER).search(searchRequest));
}

for (ActionFuture<SearchResponse> future : futures) {
SearchResponse searchResponse = future.get();
if (searchResponse.getScrollId() != null) {
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.scrollIds(List.of(searchResponse.getScrollId()));
client(LOCAL_CLUSTER).clearScroll(clearScrollRequest).get();
}

Terms terms = searchResponse.getAggregations().get("f");
assertThat(terms.getBuckets().size(), equalTo(docs));
for (Terms.Bucket bucket : terms.getBuckets()) {
assertThat(bucket.getDocCount(), equalTo(1L));
}
}
}

@Override
protected void configureRemoteCluster(String clusterAlias, Collection<String> seedNodes) throws Exception {
if (rarely()) {
super.configureRemoteCluster(clusterAlias, seedNodes);
} else {
final Settings.Builder settings = Settings.builder();
final String seedNode = randomFrom(seedNodes);
final TransportService transportService = cluster(clusterAlias).getInstance(TransportService.class, seedNode);
final String seedAddress = transportService.boundAddress().publishAddress().toString();

settings.put("cluster.remote." + clusterAlias + ".mode", "proxy");
settings.put("cluster.remote." + clusterAlias + ".proxy_address", seedAddress);
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings).get();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ public void close() {
public void consumeResult(SearchPhaseResult result, Runnable next) {
super.consumeResult(result, () -> {});
QuerySearchResult querySearchResult = result.queryResult();
querySearchResult.retainAggregationsUntilConsumed();
progressListener.notifyQueryResult(querySearchResult.getShardIndex());
pendingMerges.consume(querySearchResult, next);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,26 @@ public void writeTo(StreamOutput out) throws IOException {
queryResult.writeTo(out);
fetchResult.writeTo(out);
}


@Override
public void incRef() {
queryResult.incRef();
}

@Override
public boolean tryIncRef() {
return queryResult.tryIncRef();
}

@Override
public boolean decRef() {
return queryResult.decRef();
}

@Override
public boolean hasReferences() {
return queryResult.hasReferences();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,25 @@ public void writeTo(StreamOutput out) throws IOException {
getSearchShardTarget().writeTo(out);
result.writeTo(out);
}


@Override
public void incRef() {
result.incRef();
}

@Override
public boolean tryIncRef() {
return result.tryIncRef();
}

@Override
public boolean decRef() {
return result.decRef();
}

@Override
public boolean hasReferences() {
return result.hasReferences();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
import org.elasticsearch.core.AbstractRefCounted;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.RescoreDocIds;
Expand All @@ -32,8 +33,12 @@
import static org.elasticsearch.common.lucene.Lucene.readTopDocs;
import static org.elasticsearch.common.lucene.Lucene.writeTopDocs;

/**
* Notice that the ref counting on this only concerns the aggregations and that other parts of this object is sometimes used in non
* ref-count safe situations
*/
public final class QuerySearchResult extends SearchPhaseResult {

private static final org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(QuerySearchResult.class);
private int from;
private int size;
private TopDocsAndMaxScore topDocsAndMaxScore;
Expand All @@ -60,6 +65,13 @@ public final class QuerySearchResult extends SearchPhaseResult {

private final boolean isNull;

private final AbstractRefCounted refCounted = AbstractRefCounted.of(() -> {
if (aggregations != null) {
aggregations.close();
aggregations = null;
}
});

public QuerySearchResult() {
this(false);
}
Expand Down Expand Up @@ -187,6 +199,11 @@ public boolean hasAggs() {
return hasAggs;
}

public void retainAggregationsUntilConsumed() {
if (aggregations != null) {
incRef();
}
}
/**
* Returns and nulls out the aggregation for this search results. This allows to free up memory once the aggregation is consumed.
* @throws IllegalStateException if the aggregations have already been consumed.
Expand All @@ -200,13 +217,15 @@ public InternalAggregations consumeAggs() {
} finally {
aggregations.close();
aggregations = null;
decRef();
}
}

public void releaseAggs() {
if (aggregations != null) {
aggregations.close();
aggregations = null;
decRef();
}
}

Expand Down Expand Up @@ -405,4 +424,24 @@ public TotalHits getTotalHits() {
public float getMaxScore() {
return maxScore;
}

@Override
public void incRef() {
refCounted.incRef();
}

@Override
public boolean tryIncRef() {
return refCounted.tryIncRef();
}

@Override
public boolean decRef() {
return refCounted.decRef();
}

@Override
public boolean hasReferences() {
return refCounted.hasReferences();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,24 @@ public void writeTo(StreamOutput out) throws IOException {
getSearchShardTarget().writeTo(out);
result.writeTo(out);
}

@Override
public void incRef() {
result.incRef();
}

@Override
public boolean tryIncRef() {
return result.tryIncRef();
}

@Override
public boolean decRef() {
return result.decRef();
}

@Override
public boolean hasReferences() {
return result.hasReferences();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.elasticsearch.test.ESTestCase;

import static java.util.Collections.emptyList;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;

public class QuerySearchResultTests extends ESTestCase {

Expand Down Expand Up @@ -68,8 +70,15 @@ private static QuerySearchResult createTestInstance() throws Exception {

public void testSerialization() throws Exception {
QuerySearchResult querySearchResult = createTestInstance();
assertTrue(querySearchResult.hasReferences());
QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
QuerySearchResult::new, Version.CURRENT);
if (randomBoolean()) {
// double copy to check serializing the serialized version.
deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
QuerySearchResult::new, Version.CURRENT);
}
assertTrue(deserialized.hasReferences());
assertEquals(querySearchResult.getContextId().getId(), deserialized.getContextId().getId());
assertNull(deserialized.getSearchShardTarget());
assertEquals(querySearchResult.topDocs().maxScore, deserialized.topDocs().maxScore, 0f);
Expand All @@ -91,4 +100,33 @@ public void testNullResponse() throws Exception {
copyWriteable(querySearchResult, namedWriteableRegistry, QuerySearchResult::new, Version.CURRENT);
assertEquals(querySearchResult.isNull(), deserialized.isNull());
}

public void testAggregationRefCount() throws Exception {
QuerySearchResult querySearchResult = createTestInstance();
assertTrue(querySearchResult.hasReferences());
boolean hasAggs = querySearchResult.hasAggs();

QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
QuerySearchResult::new, Version.CURRENT);
assertTrue(deserialized.hasReferences());

querySearchResult.decRef();
assertFalse(querySearchResult.hasReferences());
assertThat(querySearchResult.aggregations(), is(nullValue()));

assertThat(deserialized.hasAggs(), is(hasAggs));
boolean inc = randomBoolean();
if (inc && hasAggs) {
deserialized.incRef();
}
if (hasAggs) {
assertNotNull(deserialized.consumeAggs());
}
if (inc || hasAggs == false) {
assertTrue(deserialized.hasReferences());
deserialized.decRef();
}
assertThat(deserialized.aggregations(), is(nullValue()));
assertFalse(deserialized.hasReferences());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ protected void disconnectFromRemoteClusters() throws Exception {
for (String clusterAlias : clusterAliases) {
if (clusterAlias.equals(LOCAL_CLUSTER) == false) {
settings.putNull("cluster.remote." + clusterAlias + ".seeds");
settings.putNull("cluster.remote." + clusterAlias + ".mode");
settings.putNull("cluster.remote." + clusterAlias + ".proxy_address");
}
}
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings).get();
Expand Down