diff --git a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc index 0a8a46a0b67b6..724ead6c22912 100644 --- a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc @@ -580,3 +580,39 @@ GET /_search <1> the possible values are `map`, `global_ordinals` Please note that Elasticsearch will ignore this execution hint if it is not applicable. + +==== Collection mode + +Deferring calculation of child aggregations + +The possibility to deffer the calculation of child aggregations is allowed specifying or accepting +the default behavior in the same way it works for terms aggregations +<> strategy. + + +[source,js] +-------------------------------------------------- +GET /_search +{ + "aggs" : { + "tags" : { + "significant_terms" : { + "field" : "tags", + "collect_mode" : "breadth_first" <1> + } + } + } +} +-------------------------------------------------- +// CONSOLE + +<1> the possible values are `breadth_first`, `depth_first` + +Breadth-first should be used only when you expect more buckets to be generated than documents +landing in the buckets. Breadth-first works by caching document data at the bucket level, and +then replaying those documents to child aggregations after the pruning phase. +The memory requirement of a breadth-first aggregation is linear to the number of documents +in each bucket prior to pruning. For many aggregations, the number of documents in each bucket +is very large. Think of a histogram with monthly intervals: you might have thousands or hundreds +of thousands of documents per bucket. This makes breadth-first a bad choice, and is why +depth-first is the default. \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java index 2f2d51b67bb14..9cc9f5d3928e3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java @@ -18,9 +18,11 @@ */ package org.elasticsearch.search.aggregations.bucket.significant; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ParseFieldRegistry; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -29,7 +31,6 @@ import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; -import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.bucket.MultiBucketAggregationBuilder; @@ -132,7 +133,11 @@ public SignificantTermsAggregationBuilder(StreamInput in) throws IOException { super(in, ValuesSourceType.ANY); bucketCountThresholds = new BucketCountThresholds(in); executionHint = in.readOptionalString(); - collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream); + if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream); + } else { + collectMode = SubAggCollectionMode.DEPTH_FIRST; + } filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class); includeExclude = in.readOptionalWriteable(IncludeExclude::new); significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class); @@ -158,7 +163,11 @@ protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map expectedSize) { - // use breadth_first if the cardinality is bigger than the expected size or unknown (-1) - return SubAggCollectionMode.BREADTH_FIRST; - } - return SubAggCollectionMode.DEPTH_FIRST; - } - /** * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1 * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 2864ffe2fcefc..98723baf10c52 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -191,20 +191,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare + "]. It can only be applied to numeric or string fields."); } - // return the SubAggCollectionMode that this aggregation should use based on the expected size - // and the cardinality of the field - static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) { - if (expectedSize == Integer.MAX_VALUE) { - // return all buckets - return SubAggCollectionMode.DEPTH_FIRST; - } - if (maxOrd == -1 || maxOrd > expectedSize) { - // use breadth_first if the cardinality is bigger than the expected size or unknown (-1) - return SubAggCollectionMode.BREADTH_FIRST; - } - return SubAggCollectionMode.DEPTH_FIRST; - } - /** * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1 * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java index 37260f9013314..51acbc192c8a9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java @@ -50,6 +50,20 @@ public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBu return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData); } + // return the SubAggCollectionMode that this aggregation should use based on the expected size + // and the cardinality of the field + public static Aggregator.SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) { + if (expectedSize == Integer.MAX_VALUE) { + // return all buckets + return Aggregator.SubAggCollectionMode.DEPTH_FIRST; + } + if (maxOrd == -1 || maxOrd > expectedSize) { + // use breadth_first if the cardinality is bigger than the expected size or unknown (-1) + return Aggregator.SubAggCollectionMode.BREADTH_FIRST; + } + return Aggregator.SubAggCollectionMode.DEPTH_FIRST; + } + protected abstract Aggregator createUnmapped(Aggregator parent, List pipelineAggregators, Map metaData) throws IOException; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index 79d0a0ad17e69..386f86d8101e0 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -39,6 +39,7 @@ import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.Aggregations; +import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory; @@ -513,6 +514,7 @@ public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristi .addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms") .field("text") .executionHint(randomExecutionHint()) + .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values())) .significanceHeuristic(heuristic) .minDocCount(1).shardSize(1000).size(1000))); }else @@ -597,6 +599,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I .subAggregation(significantTerms("mySignificantTerms") .field(TEXT_FIELD) .executionHint(randomExecutionHint()) + .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values())) .significanceHeuristic(scriptHeuristic) .minDocCount(1).shardSize(2).size(2))); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java index e91b724eea4a7..c2364b0f44b27 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BaseAggregationTestCase; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; @@ -113,6 +114,18 @@ protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() { if (randomBoolean()) { factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar")); } + if (randomBoolean()) { + int collectMode = randomInt(1); + switch (collectMode) { + case 0: + factory.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST); + break; + case 1: + factory.collectMode(Aggregator.SubAggCollectionMode.DEPTH_FIRST); + break; + } + + } return factory; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java index 0e0319d9fa79a..7c4311cfd169c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.search.aggregations.bucket.significant; import org.elasticsearch.search.aggregations.Aggregator; -import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.test.ESTestCase; import static org.hamcrest.Matchers.equalTo;