documentation and refactoring

matarrese · Nov 6, 2018 · a9f5e3e · a9f5e3e
1 parent 3f60fb6
commit a9f5e3e
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 33 deletions.
diff --git a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc
@@ -580,3 +580,39 @@ GET /_search
 <1> the possible values are `map`, `global_ordinals`
 
 Please note that Elasticsearch will ignore this execution hint if it is not applicable.
+
+==== Collection mode
+
+Deferring calculation of child aggregations
+
+The possibility to deffer the calculation of child aggregations is allowed specifying or accepting
+the default behavior in the same way it works for terms aggregations
+<<search-aggregations-bucket-significantterms-aggregation,collect mode>> strategy.
+
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "tags" : {
+             "significant_terms" : {
+                 "field" : "tags",
+                 "collect_mode" : "breadth_first" <1>
+             }
+         }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+<1> the possible values are `breadth_first`, `depth_first`
+
+Breadth-first should be used only when you expect more buckets to be generated than documents
+landing in the buckets. Breadth-first works by caching document data at the bucket level, and
+then replaying those documents to child aggregations after the pruning phase.
+The memory requirement of a breadth-first aggregation is linear to the number of documents
+in each bucket prior to pruning. For many aggregations, the number of documents in each bucket
+is very large. Think of a histogram with monthly intervals: you might have thousands or hundreds
+of thousands of documents per bucket. This makes breadth-first a bad choice, and is why
+depth-first is the default.
diff --git a/...sticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java b/...sticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java
@@ -18,9 +18,11 @@
  */
 package org.elasticsearch.search.aggregations.bucket.significant;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
 import org.elasticsearch.common.xcontent.ObjectParser;
 import org.elasticsearch.common.xcontent.ParseFieldRegistry;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -29,7 +31,6 @@
 import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
-import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
 import org.elasticsearch.search.aggregations.AggregatorFactory;
 import org.elasticsearch.search.aggregations.bucket.MultiBucketAggregationBuilder;
@@ -132,7 +133,11 @@ public SignificantTermsAggregationBuilder(StreamInput in) throws IOException {
         super(in, ValuesSourceType.ANY);
         bucketCountThresholds = new BucketCountThresholds(in);
         executionHint = in.readOptionalString();
-        collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream);
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream);
+        } else {
+            collectMode = SubAggCollectionMode.DEPTH_FIRST;
+        }
         filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class);
         includeExclude = in.readOptionalWriteable(IncludeExclude::new);
         significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
@@ -158,7 +163,11 @@ protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map<String, O
     protected void innerWriteTo(StreamOutput out) throws IOException {
         bucketCountThresholds.writeTo(out);
         out.writeOptionalString(executionHint);
-        out.writeOptionalWriteable(collectMode);
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeOptionalWriteable(collectMode);
+        } else {
+            out.writeOptionalWriteable(SubAggCollectionMode.DEPTH_FIRST);
+        }
         out.writeOptionalNamedWriteable(filterBuilder);
         out.writeOptionalWriteable(includeExclude);
         out.writeNamedWriteable(significanceHeuristic);

diff --git a/...asticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/...asticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java
@@ -258,21 +258,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
                 + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
     }
 
-
-    // return the SubAggCollectionMode that this aggregation should use based on the expected size
-    // and the cardinality of the field
-    static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
-        if (expectedSize == Integer.MAX_VALUE) {
-            // return all buckets
-            return SubAggCollectionMode.DEPTH_FIRST;
-        }
-        if (maxOrd == -1 || maxOrd > expectedSize) {
-            // use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
-            return SubAggCollectionMode.BREADTH_FIRST;
-        }
-        return SubAggCollectionMode.DEPTH_FIRST;
-    }
-
     /**
      * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
      * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.

diff --git a/.../main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/.../main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@@ -191,20 +191,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
                 + "]. It can only be applied to numeric or string fields.");
     }
 
-    // return the SubAggCollectionMode that this aggregation should use based on the expected size
-    // and the cardinality of the field
-    static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
-        if (expectedSize == Integer.MAX_VALUE) {
-            // return all buckets
-            return SubAggCollectionMode.DEPTH_FIRST;
-        }
-        if (maxOrd == -1 || maxOrd > expectedSize) {
-            // use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
-            return SubAggCollectionMode.BREADTH_FIRST;
-        }
-        return SubAggCollectionMode.DEPTH_FIRST;
-    }
-
     /**
      * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
      * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.

diff --git a/...ain/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java b/...ain/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java
@@ -50,6 +50,20 @@ public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBu
         return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData);
     }
 
+    // return the SubAggCollectionMode that this aggregation should use based on the expected size
+    // and the cardinality of the field
+    public static Aggregator.SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
+        if (expectedSize == Integer.MAX_VALUE) {
+            // return all buckets
+            return Aggregator.SubAggCollectionMode.DEPTH_FIRST;
+        }
+        if (maxOrd == -1 || maxOrd > expectedSize) {
+            // use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
+            return Aggregator.SubAggCollectionMode.BREADTH_FIRST;
+        }
+        return Aggregator.SubAggCollectionMode.DEPTH_FIRST;
+    }
+
     protected abstract Aggregator createUnmapped(Aggregator parent,
             List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException;
 

diff --git a/...ava/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/...ava/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.script.ScriptType;
 import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory;
@@ -513,6 +514,7 @@ public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristi
                     .addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms")
                             .field("text")
                             .executionHint(randomExecutionHint())
+                            .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values()))
                             .significanceHeuristic(heuristic)
                             .minDocCount(1).shardSize(1000).size(1000)));
         }else
@@ -597,6 +599,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I
                             .subAggregation(significantTerms("mySignificantTerms")
                             .field(TEXT_FIELD)
                             .executionHint(randomExecutionHint())
+                            .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values()))
                             .significanceHeuristic(scriptHeuristic)
                             .minDocCount(1).shardSize(2).size(2)));
         }

diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java
@@ -22,6 +22,7 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
@@ -113,6 +114,18 @@ protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
         if (randomBoolean()) {
             factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
         }
+        if (randomBoolean()) {
+            int collectMode = randomInt(1);
+            switch (collectMode) {
+                case 0:
+                    factory.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
+                    break;
+                case 1:
+                    factory.collectMode(Aggregator.SubAggCollectionMode.DEPTH_FIRST);
+                    break;
+            }
+
+        }
         return factory;
     }
 

diff --git a/...search/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java b/...search/search/aggregations/bucket/significant/SignificantTermsAggregatorFactoryTests.java
@@ -20,7 +20,6 @@
 package org.elasticsearch.search.aggregations.bucket.significant;
 
 import org.elasticsearch.search.aggregations.Aggregator;
-import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
 import org.elasticsearch.test.ESTestCase;
 
 import static org.hamcrest.Matchers.equalTo;