From 34bbf51815c85902f1b072a176be12ea254f1775 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Thu, 20 Feb 2020 14:38:19 -0500 Subject: [PATCH 1/4] ValuesSource refactoring: Wire up SigTerms aggregation --- .../elasticsearch/search/SearchModule.java | 3 +- .../SignificantTermsAggregationBuilder.java | 5 + .../SignificantTermsAggregatorFactory.java | 187 ++++++++++++------ .../SignificantTermsAggregatorSupplier.java | 51 +++++ .../SignificantTermsAggregatorTests.java | 2 +- 5 files changed, 187 insertions(+), 61 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 36421b5cb9aac..b7702e8d4b318 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -402,7 +402,8 @@ private void registerAggregations(List plugins) { SignificantTermsAggregationBuilder::parse) .addResultReader(SignificantStringTerms.NAME, SignificantStringTerms::new) .addResultReader(SignificantLongTerms.NAME, SignificantLongTerms::new) - .addResultReader(UnmappedSignificantTerms.NAME, UnmappedSignificantTerms::new)); + .addResultReader(UnmappedSignificantTerms.NAME, UnmappedSignificantTerms::new) + .setAggregatorRegistrar(SignificantTermsAggregationBuilder::registerAggregators)); registerAggregation(new AggregationSpec(SignificantTextAggregationBuilder.NAME, SignificantTextAggregationBuilder::new, SignificantTextAggregationBuilder::parse)); registerAggregation(new AggregationSpec(RangeAggregationBuilder.NAME, RangeAggregationBuilder::new, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java index 429e22c2b8524..8808545c38905 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregationBuilder.java @@ -42,6 +42,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; @@ -93,6 +94,10 @@ public static SignificantTermsAggregationBuilder parse(String aggregationName, X return PARSER.parse(parser, new SignificantTermsAggregationBuilder(aggregationName), null); } + public static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { + SignificantTermsAggregatorFactory.registerAggregators(valuesSourceRegistry); + } + private IncludeExclude includeExclude = null; private String executionHint = null; private QueryBuilder filterBuilder = null; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 052f24bcee15c..b7a57cf8a9f81 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -50,9 +50,12 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregatorSupplier; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -75,17 +78,116 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final SignificanceHeuristic significanceHeuristic; - public SignificantTermsAggregatorFactory(String name, - ValuesSourceConfig config, - IncludeExclude includeExclude, - String executionHint, - QueryBuilder filterBuilder, - TermsAggregator.BucketCountThresholds bucketCountThresholds, - SignificanceHeuristic significanceHeuristic, - QueryShardContext queryShardContext, - AggregatorFactory parent, - AggregatorFactories.Builder subFactoriesBuilder, - Map metaData) throws IOException { + static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { + valuesSourceRegistry.register(SignificantTermsAggregationBuilder.NAME, + List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP), + SignificantTermsAggregatorFactory.bytesSupplier()); + + valuesSourceRegistry.register(SignificantTermsAggregationBuilder.NAME, + List.of(CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.NUMERIC), + SignificantTermsAggregatorFactory.numericSupplier()); + } + + /** + * This supplier is used for all the field types that should be aggregated as bytes/strings, + * including those that need global ordinals + */ + private static SignificantTermsAggregatorSupplier bytesSupplier() { + return new SignificantTermsAggregatorSupplier() { + @Override + public Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + String executionHint, + SearchContext context, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory sigTermsFactory, + List pipelineAggregators, + Map metaData) throws IOException { + + ExecutionMode execution = null; + if (executionHint != null) { + execution = ExecutionMode.fromString(executionHint, deprecationLogger); + } + if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { + execution = ExecutionMode.MAP; + } + if (execution == null) { + execution = ExecutionMode.GLOBAL_ORDINALS; + } + + if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { + throw new IllegalArgumentException("Aggregation [" + name + "] cannot support regular expression style " + + "include/exclude settings as they can only be applied to string fields. Use an array of values for " + + "include/exclude clauses"); + } + + return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, + significanceHeuristic, sigTermsFactory, pipelineAggregators, metaData); + + } + }; + } + + /** + * This supplier is used for all fields that expect to be aggregated as a numeric value. + * This includes floating points, and formatted types that use numerics internally for storage (date, boolean, etc) + */ + private static SignificantTermsAggregatorSupplier numericSupplier() { + return new SignificantTermsAggregatorSupplier() { + @Override + public Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + String executionHint, + SearchContext context, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory sigTermsFactory, + List pipelineAggregators, + Map metaData) throws IOException { + + if ((includeExclude != null) && (includeExclude.isRegexBased())) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " + + "used to filter numeric fields"); + } + + if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { + throw new UnsupportedOperationException("No support for examining floating point numerics"); + } + + IncludeExclude.LongFilter longFilter = null; + if (includeExclude != null) { + longFilter = includeExclude.convertToLongFilter(format); + } + + return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, format, + bucketCountThresholds, context, parent, significanceHeuristic, sigTermsFactory, longFilter, pipelineAggregators, + metaData); + + } + }; + } + + SignificantTermsAggregatorFactory(String name, + ValuesSourceConfig config, + IncludeExclude includeExclude, + String executionHint, + QueryBuilder filterBuilder, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + SignificanceHeuristic significanceHeuristic, + QueryShardContext queryShardContext, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); if (!config.unmapped()) { @@ -111,7 +213,7 @@ public SignificantTermsAggregatorFactory(String name, /** * Get the number of docs in the superset. */ - public long getSupersetNumDocs() { + long getSupersetNumDocs() { return supersetNumDocs; } @@ -151,12 +253,12 @@ private long getBackgroundFrequency(String value) throws IOException { return queryShardContext.searcher().count(query); } - public long getBackgroundFrequency(BytesRef termBytes) throws IOException { + long getBackgroundFrequency(BytesRef termBytes) throws IOException { String value = config.format().format(termBytes).toString(); return getBackgroundFrequency(value); } - public long getBackgroundFrequency(long termNum) throws IOException { + long getBackgroundFrequency(long termNum) throws IOException { String value = config.format().format(termNum).toString(); return getBackgroundFrequency(value); } @@ -187,6 +289,13 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, return asMultiBucketAggregator(this, searchContext, parent); } + AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config.valueSourceType(), + SignificantTermsAggregationBuilder.NAME); + if (aggregatorSupplier instanceof SignificantTermsAggregatorSupplier == false) { + throw new AggregationExecutionException("Registry miss-match - expected SignificantTermsAggregatorSupplier, found [" + + aggregatorSupplier.getClass().toString() + "]"); + } + numberOfAggregatorsCreated++; BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds); if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { @@ -205,52 +314,12 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize())); } - if (valuesSource instanceof ValuesSource.Bytes) { - ExecutionMode execution = null; - if (executionHint != null) { - execution = ExecutionMode.fromString(executionHint, deprecationLogger); - } - if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { - execution = ExecutionMode.MAP; - } - if (execution == null) { - execution = ExecutionMode.GLOBAL_ORDINALS; - } - assert execution != null; - - DocValueFormat format = config.format(); - if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style " - + "include/exclude settings as they can only be applied to string fields. Use an array of values for " - + "include/exclude clauses"); - } - - return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, searchContext, parent, - significanceHeuristic, this, pipelineAggregators, metaData); - } - - if ((includeExclude != null) && (includeExclude.isRegexBased())) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " - + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " - + "used to filter numeric fields"); - } - - if (valuesSource instanceof ValuesSource.Numeric) { - - if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { - throw new UnsupportedOperationException("No support for examining floating point numerics"); - } - IncludeExclude.LongFilter longFilter = null; - if (includeExclude != null) { - longFilter = includeExclude.convertToLongFilter(config.format()); - } - return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), - bucketCountThresholds, searchContext, parent, significanceHeuristic, this, longFilter, pipelineAggregators, - metaData); - } + SignificantTermsAggregatorSupplier sigTermsAggregatorSupplier = (SignificantTermsAggregatorSupplier) aggregatorSupplier; - throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" - + config.fieldContext().field() + "]. It can only be applied to numeric or string fields."); + // TODO we should refactor so that we don't need to use this Factory as a singleton (e.g. stop passing `this` to the aggregators) + return sigTermsAggregatorSupplier.build(name, factories, valuesSource, config.format(), + bucketCountThresholds, includeExclude, executionHint, searchContext, parent, + significanceHeuristic, this, pipelineAggregators, metaData); } public enum ExecutionMode { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java new file mode 100644 index 0000000000000..d86d92a31fe95 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java @@ -0,0 +1,51 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket.significant; + +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregatorSupplier; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +interface SignificantTermsAggregatorSupplier extends AggregatorSupplier { + Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + String executionHint, + SearchContext context, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory sigTermsFactory, + List pipelineAggregators, + Map metaData) throws IOException; +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java index 120e5cc00252e..5c11d2df76d16 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java @@ -294,7 +294,7 @@ public void testRangeField() throws IOException { try (IndexReader reader = DirectoryReader.open(w)) { IndexSearcher indexSearcher = newIndexSearcher(reader); - expectThrows(AggregationExecutionException.class, () -> createAggregator(sigAgg, indexSearcher, fieldType)); + expectThrows(IllegalArgumentException.class, () -> createAggregator(sigAgg, indexSearcher, fieldType)); } } } From 161ba7c92a5a2e0161d6eaac0b870b6d4cd64186 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Thu, 20 Feb 2020 15:30:34 -0500 Subject: [PATCH 2/4] Fix yaml test --- .../rest-api-spec/test/search.aggregation/30_sig_terms.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml index dabd4aebf815d..d7759afe4a907 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml @@ -133,7 +133,7 @@ - length: { aggregations.ip_terms.buckets: 0 } - do: - catch: request + catch: /Aggregation \[ip_terms\] cannot support regular expression style include\/exclude settings as they can only be applied to string fields\. Use an array of values for include\/exclude clauses/ search: rest_total_hits_as_int: true body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } } From cf37c60f39c6932500151180a06126fe6969f6ca Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Mon, 24 Feb 2020 11:23:13 -0500 Subject: [PATCH 3/4] Tweak exception type --- .../bucket/significant/SignificantTermsAggregatorFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index b7a57cf8a9f81..8a049c5ae1551 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -155,7 +155,7 @@ public Aggregator build(String name, Map metaData) throws IOException { if ((includeExclude != null) && (includeExclude.isRegexBased())) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + throw new IllegalArgumentException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " + "used to filter numeric fields"); } From 6e74e6bf5e9fa5e8267c549b645430dc3c329fb6 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Mon, 24 Feb 2020 16:26:53 -0500 Subject: [PATCH 4/4] Checkstyle because I'm bad --- .../bucket/significant/SignificantTermsAggregatorFactory.java | 4 ++-- .../significant/SignificantTermsAggregatorSupplier.java | 1 - .../bucket/significant/SignificantTermsAggregatorTests.java | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 8a049c5ae1551..67aeed8312626 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -156,8 +156,8 @@ public Aggregator build(String name, if ((includeExclude != null) && (includeExclude.isRegexBased())) { throw new IllegalArgumentException("Aggregation [" + name + "] cannot support regular expression style include/exclude " - + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " - + "used to filter numeric fields"); + + "settings as they can only be applied to string fields. Use an array of numeric " + + "values for include/exclude clauses used to filter numeric fields"); } if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java index d86d92a31fe95..e9c494ffd1b48 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorSupplier.java @@ -21,7 +21,6 @@ import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java index 5c11d2df76d16..b6b269d5bc256 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java @@ -46,7 +46,6 @@ import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;