elastic · henningandersen · Oct 4, 2021 · Sep 28, 2021 · Sep 29, 2021 · Sep 30, 2021
diff --git a/...r/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchLeakIT.java b/...r/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchLeakIT.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.search.ccs;
+
+import org.elasticsearch.action.ActionFuture;
+import org.elasticsearch.action.search.ClearScrollRequest;
+import org.elasticsearch.action.search.SearchRequest;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.index.query.MatchAllQueryBuilder;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.test.AbstractMultiClustersTestCase;
+import org.elasticsearch.test.InternalTestCluster;
+import org.elasticsearch.transport.TransportService;
+import org.hamcrest.Matchers;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.hamcrest.Matchers.equalTo;
+
+public class CrossClusterSearchLeakIT extends AbstractMultiClustersTestCase {
+
+    @Override
+    protected Collection<String> remoteClusterAlias() {
+        return List.of("cluster_a");
+    }
+
+    @Override
+    protected boolean reuseClusters() {
+        return false;
+    }
+
+    private int indexDocs(Client client, String field, String index) {
+        int numDocs = between(1, 200);
+        for (int i = 0; i < numDocs; i++) {
+            client.prepareIndex(index).setSource(field, "v" + i).get();
+        }
+        client.admin().indices().prepareRefresh(index).get();
+        return numDocs;
+    }
+
+    /**
+     * This test validates that we do not leak any memory when running CCS in various modes, actual validation is done by test framework
+     * (leak detection)
+     * <ul>
+     *     <li>proxy vs non-proxy</li>
+     *     <li>single-phase query-fetch or multi-phase</li>
+     *     <li>minimize roundtrip vs not</li>
+     *     <li>scroll vs no scroll</li>
+     * </ul>
+     */
+    public void testSearch() throws Exception {
+        assertAcked(client(LOCAL_CLUSTER).admin().indices().prepareCreate("demo")
+            .setMapping("f", "type=keyword")
+            .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 3))));
+        indexDocs(client(LOCAL_CLUSTER), "ignored", "demo");
+        final InternalTestCluster remoteCluster = cluster("cluster_a");
+        int minRemotes = between(2, 5);
+        remoteCluster.ensureAtLeastNumDataNodes(minRemotes);
+        List<String> remoteDataNodes = StreamSupport.stream(remoteCluster.clusterService().state().nodes().spliterator(), false)
+            .filter(DiscoveryNode::canContainData)
+            .map(DiscoveryNode::getName)
+            .collect(Collectors.toList());
+        assertThat(remoteDataNodes.size(), Matchers.greaterThanOrEqualTo(minRemotes));
+        List<String> seedNodes = randomSubsetOf(between(1, remoteDataNodes.size() - 1), remoteDataNodes);
+        disconnectFromRemoteClusters();
+        configureRemoteCluster("cluster_a", seedNodes);
+        final Settings.Builder allocationFilter = Settings.builder();
+        if (rarely()) {
+            allocationFilter.put("index.routing.allocation.include._name", String.join(",", seedNodes));
+        } else {
+            // Provoke using proxy connections
+            allocationFilter.put("index.routing.allocation.exclude._name", String.join(",", seedNodes));
+        }
+        assertAcked(client("cluster_a").admin().indices().prepareCreate("prod")
+            .setMapping("f", "type=keyword")
+            .setSettings(Settings.builder().put(allocationFilter.build())
+                .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 3))));
+        assertFalse(client("cluster_a").admin().cluster().prepareHealth("prod")
+            .setWaitForYellowStatus().setTimeout(TimeValue.timeValueSeconds(10)).get().isTimedOut());
+        int docs = indexDocs(client("cluster_a"), "f", "prod");
+
+        List<ActionFuture<SearchResponse>> futures = new ArrayList<>();
+        for (int i = 0; i < 10; ++i) {
+            String[] indices = randomBoolean() ? new String[] { "demo", "cluster_a:prod" } : new String[] { "cluster_a:prod" };
+            final SearchRequest searchRequest = new SearchRequest(indices);
+            searchRequest.allowPartialSearchResults(false);
+            searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder())
+                .aggregation(terms("f").field("f").size(docs + between(0, 10))).size(between(0, 1000)));
+            if (randomBoolean()) {
+                searchRequest.scroll("30s");
+            }
+            searchRequest.setCcsMinimizeRoundtrips(rarely());
+            futures.add(client(LOCAL_CLUSTER).search(searchRequest));
+        }
+
+        for (ActionFuture<SearchResponse> future : futures) {
+            SearchResponse searchResponse = future.get();
+            if (searchResponse.getScrollId() != null) {
+                ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
+                clearScrollRequest.scrollIds(List.of(searchResponse.getScrollId()));
+                client(LOCAL_CLUSTER).clearScroll(clearScrollRequest).get();
+            }
+
+            Terms terms = searchResponse.getAggregations().get("f");
+            assertThat(terms.getBuckets().size(), equalTo(docs));
+            for (Terms.Bucket bucket : terms.getBuckets()) {
+                assertThat(bucket.getDocCount(), equalTo(1L));
+            }
+        }
+    }
+
+    @Override
+    protected void configureRemoteCluster(String clusterAlias, Collection<String> seedNodes) throws Exception {
+        if (rarely()) {
+            super.configureRemoteCluster(clusterAlias, seedNodes);
+        } else {
+            final Settings.Builder settings = Settings.builder();
+            final String seedNode = randomFrom(seedNodes);
+            final TransportService transportService = cluster(clusterAlias).getInstance(TransportService.class, seedNode);
+            final String seedAddress = transportService.boundAddress().publishAddress().toString();
+
+            settings.put("cluster.remote." + clusterAlias + ".mode", "proxy");
+            settings.put("cluster.remote." + clusterAlias + ".proxy_address", seedAddress);
+            client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings).get();
+        }
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java b/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java
@@ -102,6 +102,7 @@ public void close() {
     public void consumeResult(SearchPhaseResult result, Runnable next) {
         super.consumeResult(result, () -> {});
         QuerySearchResult querySearchResult = result.queryResult();
+        querySearchResult.retainAggregationsUntilConsumed();
         progressListener.notifyQueryResult(querySearchResult.getShardIndex());
         pendingMerges.consume(querySearchResult, next);
     }

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/QueryFetchSearchResult.java b/server/src/main/java/org/elasticsearch/search/fetch/QueryFetchSearchResult.java
@@ -72,4 +72,26 @@ public void writeTo(StreamOutput out) throws IOException {
         queryResult.writeTo(out);
         fetchResult.writeTo(out);
     }
+
+
+    @Override
+    public void incRef() {
+        queryResult.incRef();
+    }
+
+    @Override
+    public boolean tryIncRef() {
+        return queryResult.tryIncRef();
+    }
+
+    @Override
+    public boolean decRef() {
+        return queryResult.decRef();
+    }
+
+    @Override
+    public boolean hasReferences() {
+        return queryResult.hasReferences();
+    }
+
 }
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/ScrollQueryFetchSearchResult.java b/server/src/main/java/org/elasticsearch/search/fetch/ScrollQueryFetchSearchResult.java
@@ -63,4 +63,25 @@ public void writeTo(StreamOutput out) throws IOException {
         getSearchShardTarget().writeTo(out);
         result.writeTo(out);
     }
+
+
+    @Override
+    public void incRef() {
+        result.incRef();
+    }
+
+    @Override
+    public boolean tryIncRef() {
+        return result.tryIncRef();
+    }
+
+    @Override
+    public boolean decRef() {
+        return result.decRef();
+    }
+
+    @Override
+    public boolean hasReferences() {
+        return result.hasReferences();
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java b/server/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
+import org.elasticsearch.core.AbstractRefCounted;
 import org.elasticsearch.core.Releasables;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.RescoreDocIds;
@@ -32,8 +33,12 @@
 import static org.elasticsearch.common.lucene.Lucene.readTopDocs;
 import static org.elasticsearch.common.lucene.Lucene.writeTopDocs;
 
+/**
+ * Notice that the ref counting on this only concerns the aggregations and that other parts of this object is sometimes used in non
+ * ref-count safe situations
+ */
 public final class QuerySearchResult extends SearchPhaseResult {
-
+    private static final org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(QuerySearchResult.class);
     private int from;
     private int size;
     private TopDocsAndMaxScore topDocsAndMaxScore;
@@ -60,6 +65,13 @@ public final class QuerySearchResult extends SearchPhaseResult {
 
     private final boolean isNull;
 
+    private final AbstractRefCounted refCounted = AbstractRefCounted.of(() -> {
+        if (aggregations != null) {
+            aggregations.close();
+            aggregations = null;
+        }
+    });
+
     public QuerySearchResult() {
         this(false);
     }
@@ -187,6 +199,11 @@ public boolean hasAggs() {
         return hasAggs;
     }
 
+    public void retainAggregationsUntilConsumed() {
+        if (aggregations != null) {
+            incRef();
+        }
+    }
     /**
      * Returns and nulls out the aggregation for this search results. This allows to free up memory once the aggregation is consumed.
      * @throws IllegalStateException if the aggregations have already been consumed.
@@ -200,13 +217,15 @@ public InternalAggregations consumeAggs() {
         } finally {
             aggregations.close();
             aggregations = null;
+            decRef();
         }
     }
 
     public void releaseAggs() {
         if (aggregations != null) {
             aggregations.close();
             aggregations = null;
+            decRef();
         }
     }
 
@@ -405,4 +424,24 @@ public TotalHits getTotalHits() {
     public float getMaxScore() {
         return maxScore;
     }
+
+    @Override
+    public void incRef() {
+        refCounted.incRef();
+    }
+
+    @Override
+    public boolean tryIncRef() {
+        return refCounted.tryIncRef();
+    }
+
+    @Override
+    public boolean decRef() {
+        return refCounted.decRef();
+    }
+
+    @Override
+    public boolean hasReferences() {
+        return refCounted.hasReferences();
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/query/ScrollQuerySearchResult.java b/server/src/main/java/org/elasticsearch/search/query/ScrollQuerySearchResult.java
@@ -53,4 +53,24 @@ public void writeTo(StreamOutput out) throws IOException {
         getSearchShardTarget().writeTo(out);
         result.writeTo(out);
     }
+
+    @Override
+    public void incRef() {
+        result.incRef();
+    }
+
+    @Override
+    public boolean tryIncRef() {
+        return result.tryIncRef();
+    }
+
+    @Override
+    public boolean decRef() {
+        return result.decRef();
+    }
+
+    @Override
+    public boolean hasReferences() {
+        return result.hasReferences();
+    }
 }
diff --git a/server/src/test/java/org/elasticsearch/search/query/QuerySearchResultTests.java b/server/src/test/java/org/elasticsearch/search/query/QuerySearchResultTests.java
@@ -33,6 +33,8 @@
 import org.elasticsearch.test.ESTestCase;
 
 import static java.util.Collections.emptyList;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
 
 public class QuerySearchResultTests extends ESTestCase {
 
@@ -68,8 +70,15 @@ private static QuerySearchResult createTestInstance() throws Exception {
 
     public void testSerialization() throws Exception {
         QuerySearchResult querySearchResult = createTestInstance();
+        assertTrue(querySearchResult.hasReferences());
         QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
             QuerySearchResult::new, Version.CURRENT);
+        if (randomBoolean()) {
+            // double copy to check serializing the serialized version.
+            deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
+                QuerySearchResult::new, Version.CURRENT);
+        }
+        assertTrue(deserialized.hasReferences());
         assertEquals(querySearchResult.getContextId().getId(), deserialized.getContextId().getId());
         assertNull(deserialized.getSearchShardTarget());
         assertEquals(querySearchResult.topDocs().maxScore, deserialized.topDocs().maxScore, 0f);
@@ -91,4 +100,33 @@ public void testNullResponse() throws Exception {
             copyWriteable(querySearchResult, namedWriteableRegistry, QuerySearchResult::new, Version.CURRENT);
         assertEquals(querySearchResult.isNull(), deserialized.isNull());
     }
+
+    public void testAggregationRefCount() throws Exception {
+        QuerySearchResult querySearchResult = createTestInstance();
+        assertTrue(querySearchResult.hasReferences());
+        boolean hasAggs = querySearchResult.hasAggs();
+
+        QuerySearchResult deserialized = copyWriteable(querySearchResult, namedWriteableRegistry,
+            QuerySearchResult::new, Version.CURRENT);
+        assertTrue(deserialized.hasReferences());
+
+        querySearchResult.decRef();
+        assertFalse(querySearchResult.hasReferences());
+        assertThat(querySearchResult.aggregations(), is(nullValue()));
+
+        assertThat(deserialized.hasAggs(), is(hasAggs));
+        boolean inc = randomBoolean();
+        if (inc && hasAggs) {
+            deserialized.incRef();
+        }
+        if (hasAggs) {
+            assertNotNull(deserialized.consumeAggs());
+        }
+        if (inc || hasAggs == false) {
+            assertTrue(deserialized.hasReferences());
+            deserialized.decRef();
+        }
+        assertThat(deserialized.aggregations(), is(nullValue()));
+        assertFalse(deserialized.hasReferences());
+    }
 }
diff --git a/test/framework/src/main/java/org/elasticsearch/test/AbstractMultiClustersTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/AbstractMultiClustersTestCase.java
@@ -134,6 +134,8 @@ protected void disconnectFromRemoteClusters() throws Exception {
         for (String clusterAlias : clusterAliases) {
             if (clusterAlias.equals(LOCAL_CLUSTER) == false) {
                 settings.putNull("cluster.remote." + clusterAlias + ".seeds");
+                settings.putNull("cluster.remote." + clusterAlias + ".mode");
+                settings.putNull("cluster.remote." + clusterAlias + ".proxy_address");
             }
         }
         client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings).get();