Skip to content

Commit

Permalink
documentation and refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
matarrese committed Nov 6, 2018
1 parent 3f60fb6 commit a9f5e3e
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -580,3 +580,39 @@ GET /_search
<1> the possible values are `map`, `global_ordinals`

Please note that Elasticsearch will ignore this execution hint if it is not applicable.

==== Collection mode

Deferring calculation of child aggregations

The possibility to deffer the calculation of child aggregations is allowed specifying or accepting
the default behavior in the same way it works for terms aggregations
<<search-aggregations-bucket-significantterms-aggregation,collect mode>> strategy.


[source,js]
--------------------------------------------------
GET /_search
{
"aggs" : {
"tags" : {
"significant_terms" : {
"field" : "tags",
"collect_mode" : "breadth_first" <1>
}
}
}
}
--------------------------------------------------
// CONSOLE

<1> the possible values are `breadth_first`, `depth_first`

Breadth-first should be used only when you expect more buckets to be generated than documents
landing in the buckets. Breadth-first works by caching document data at the bucket level, and
then replaying those documents to child aggregations after the pruning phase.
The memory requirement of a breadth-first aggregation is linear to the number of documents
in each bucket prior to pruning. For many aggregations, the number of documents in each bucket
is very large. Think of a histogram with monthly intervals: you might have thousands or hundreds
of thousands of documents per bucket. This makes breadth-first a bad choice, and is why
depth-first is the default.
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
*/
package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ParseFieldRegistry;
import org.elasticsearch.common.xcontent.XContentBuilder;
Expand All @@ -29,7 +31,6 @@
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.MultiBucketAggregationBuilder;
Expand Down Expand Up @@ -132,7 +133,11 @@ public SignificantTermsAggregationBuilder(StreamInput in) throws IOException {
super(in, ValuesSourceType.ANY);
bucketCountThresholds = new BucketCountThresholds(in);
executionHint = in.readOptionalString();
collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream);
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream);
} else {
collectMode = SubAggCollectionMode.DEPTH_FIRST;
}
filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class);
includeExclude = in.readOptionalWriteable(IncludeExclude::new);
significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
Expand All @@ -158,7 +163,11 @@ protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map<String, O
protected void innerWriteTo(StreamOutput out) throws IOException {
bucketCountThresholds.writeTo(out);
out.writeOptionalString(executionHint);
out.writeOptionalWriteable(collectMode);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeOptionalWriteable(collectMode);
} else {
out.writeOptionalWriteable(SubAggCollectionMode.DEPTH_FIRST);
}
out.writeOptionalNamedWriteable(filterBuilder);
out.writeOptionalWriteable(includeExclude);
out.writeNamedWriteable(significanceHeuristic);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,21 +258,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
+ config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}


// return the SubAggCollectionMode that this aggregation should use based on the expected size
// and the cardinality of the field
static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
if (expectedSize == Integer.MAX_VALUE) {
// return all buckets
return SubAggCollectionMode.DEPTH_FIRST;
}
if (maxOrd == -1 || maxOrd > expectedSize) {
// use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
return SubAggCollectionMode.BREADTH_FIRST;
}
return SubAggCollectionMode.DEPTH_FIRST;
}

/**
* Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
* if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,20 +191,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
+ "]. It can only be applied to numeric or string fields.");
}

// return the SubAggCollectionMode that this aggregation should use based on the expected size
// and the cardinality of the field
static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
if (expectedSize == Integer.MAX_VALUE) {
// return all buckets
return SubAggCollectionMode.DEPTH_FIRST;
}
if (maxOrd == -1 || maxOrd > expectedSize) {
// use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
return SubAggCollectionMode.BREADTH_FIRST;
}
return SubAggCollectionMode.DEPTH_FIRST;
}

/**
* Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
* if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBu
return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData);
}

// return the SubAggCollectionMode that this aggregation should use based on the expected size
// and the cardinality of the field
public static Aggregator.SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
if (expectedSize == Integer.MAX_VALUE) {
// return all buckets
return Aggregator.SubAggCollectionMode.DEPTH_FIRST;
}
if (maxOrd == -1 || maxOrd > expectedSize) {
// use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
return Aggregator.SubAggCollectionMode.BREADTH_FIRST;
}
return Aggregator.SubAggCollectionMode.DEPTH_FIRST;
}

protected abstract Aggregator createUnmapped(Aggregator parent,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory;
Expand Down Expand Up @@ -513,6 +514,7 @@ public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristi
.addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms")
.field("text")
.executionHint(randomExecutionHint())
.collectMode(randomFrom(Aggregator.SubAggCollectionMode.values()))
.significanceHeuristic(heuristic)
.minDocCount(1).shardSize(1000).size(1000)));
}else
Expand Down Expand Up @@ -597,6 +599,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I
.subAggregation(significantTerms("mySignificantTerms")
.field(TEXT_FIELD)
.executionHint(randomExecutionHint())
.collectMode(randomFrom(Aggregator.SubAggCollectionMode.values()))
.significanceHeuristic(scriptHeuristic)
.minDocCount(1).shardSize(2).size(2)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
Expand Down Expand Up @@ -113,6 +114,18 @@ protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
if (randomBoolean()) {
factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
}
if (randomBoolean()) {
int collectMode = randomInt(1);
switch (collectMode) {
case 0:
factory.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
break;
case 1:
factory.collectMode(Aggregator.SubAggCollectionMode.DEPTH_FIRST);
break;
}

}
return factory;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.Matchers.equalTo;
Expand Down

0 comments on commit a9f5e3e

Please sign in to comment.