diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml index 1a3f07e6fa7c7..4b9b03ba17c2e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -822,8 +822,8 @@ setup: --- "string profiler via global ordinals filters implementation": - skip: - version: " - 7.12.99" - reason: filters implementation first supported with sub-aggregators in 7.13.0 + version: " - 7.99.99" + reason: profile info changed in 8.0.0 to be backported to 7.14.0 - do: indices.create: index: test_3 @@ -879,7 +879,7 @@ setup: - match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters } - match: { profile.shards.0.aggregations.0.description: str_terms } - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 } - - match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter } + - match: { profile.shards.0.aggregations.0.debug.delegate: FilterByFilterAggregator } - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: "str:cow" } - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: "str:pig" } - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: "str:sheep" } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml index 46ee2dc53207f..983db9391abc2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -150,9 +150,10 @@ setup: --- "Test filters agg with doc_count": - skip: - version: " - 7.12.99" + version: " - 7.13.99" + reason: profile info changed in 7.14.0 features: default_shards - reason: "name changed in 7.13.0" + - do: search: body: @@ -177,7 +178,7 @@ setup: - match: { aggregations.f.buckets.abc.doc_count: 11 } - match: { aggregations.f.buckets.foo.doc_count: 8 } - match: { aggregations.f.buckets.xyz.doc_count: 5 } - - match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter } + - match: { profile.shards.0.aggregations.0.type: FilterByFilterAggregator } # We can't assert that segments_with_doc_count_field is > 0 because we might # end up with two shards and all of the documents with the _doc_count field # may be on one field. We have a test for this in AggregationProfilerIT diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java index 162ceed91a3e8..427b1b8447b0e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java @@ -47,7 +47,6 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.notNullValue; @ESIntegTestCase.SuiteScopeTestCase @@ -646,21 +645,17 @@ public void testFilterByFilter() throws InterruptedException, IOException { "delegate_debug", matchesMap().entry("average_docs_per_range", equalTo(RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)) .entry("ranges", 1) - .entry("delegate", "FiltersAggregator.FilterByFilter") + .entry("delegate", "FilterByFilterAggregator") .entry( "delegate_debug", matchesMap().entry("segments_with_deleted_docs", 0) .entry("segments_with_doc_count_field", 0) - .entry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2) - .entry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2) - .entry("estimate_cost_time", greaterThanOrEqualTo(0L)) // ~1,276,734 nanos is normal .entry("segments_counted", 0) .entry("segments_collected", greaterThan(0)) .entry( "filters", matchesList().item( - matchesMap().entry("scorers_prepared_while_estimating_cost", greaterThan(0)) - .entry("query", "DocValuesFieldExistsQuery [field=date]") + matchesMap().entry("query", "DocValuesFieldExistsQuery [field=date]") .entry("specialized_for", "docvalues_field_exists") .entry("results_from_metadata", 0) ) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java index e6ff26be950c6..2e19fb53d59dd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java @@ -53,7 +53,7 @@ public KeywordScriptFieldType(String name) { this(name, StringFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder); } - KeywordScriptFieldType( + public KeywordScriptFieldType( String name, StringFieldScript.Factory scriptFactory, Script script, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java index 66052704efe57..419b08b0b61ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java @@ -46,7 +46,7 @@ public LongScriptFieldType(String name) { this(name, LongFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder); } - LongScriptFieldType( + public LongScriptFieldType( String name, LongFieldScript.Factory scriptFactory, Script script, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index 1a0bd498b8e51..1c78fe67703d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -30,7 +30,7 @@ public abstract class AdaptingAggregator extends Aggregator { public AdaptingAggregator( Aggregator parent, AggregatorFactories subAggregators, - CheckedFunction delegate + CheckedFunction delegate ) throws IOException { // Its important we set parent first or else when we build the sub-aggregators they can fail because they'll call this.parent. this.parent = parent; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java index 2df824b3d5e50..9500664e30275 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java @@ -14,7 +14,6 @@ import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; import java.io.IOException; import java.util.function.BiConsumer; @@ -43,14 +42,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) return super.count(ctx, counter, live); } - @Override - long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { - if (canUseMetadata.get() && canCountFromMetadata(ctx)) { - return 0; - } - return super.estimateCountCost(ctx, canUseMetadata); - } - private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException { FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField()); if (info == null) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterByFilterAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterByFilterAggregator.java new file mode 100644 index 0000000000000..ed08fe30ec901 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterByFilterAggregator.java @@ -0,0 +1,314 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.filter; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.util.Bits; +import org.elasticsearch.common.CheckedSupplier; +import org.elasticsearch.core.CheckedFunction; +import org.elasticsearch.search.aggregations.AdaptingAggregator; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.CardinalityUpperBound; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.runtime.AbstractScriptFieldQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; + +/** + * Collects results by running each filter against the searcher and doesn't + * build any {@link LeafBucketCollector}s which is generally faster than + * {@link Compatible} but doesn't support when there is a parent aggregator + * or any child aggregators. + */ +public class FilterByFilterAggregator extends FiltersAggregator { + /** + * Builds {@link FilterByFilterAggregator} when the filters are valid and + * it would be faster than a "native" aggregation implementation. The + * interface is designed to allow easy construction of + * {@link AdaptingAggregator}. + */ + public abstract static class AdapterBuilder { + private final String name; + private final List> filters = new ArrayList<>(); + private final boolean keyed; + private final AggregationContext context; + private final Aggregator parent; + private final CardinalityUpperBound cardinality; + private final Map metadata; + private final Query rewrittenTopLevelQuery; + private boolean valid = true; + + public AdapterBuilder( + String name, + boolean keyed, + String otherBucketKey, + AggregationContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + this.name = name; + this.keyed = keyed; + this.context = context; + this.parent = parent; + this.cardinality = cardinality; + this.metadata = metadata; + this.rewrittenTopLevelQuery = context.searcher().rewrite(context.query()); + this.valid = parent == null && otherBucketKey == null; + } + + /** + * Subclasses should override this to adapt the + * {@link FilterByFilterAggregator} into another sort of aggregator + * if required. + */ + protected abstract T adapt(CheckedFunction delegate) + throws IOException; + + public final void add(String key, Query query) throws IOException { + if (valid == false) { + return; + } + if (query instanceof AbstractScriptFieldQuery) { + /* + * We know that runtime fields aren't fast to query at all + * but we expect all other sorts of queries are at least as + * fast as the native aggregator. + */ + valid = false; + return; + } + add(QueryToFilterAdapter.build(context.searcher(), key, query)); + } + + final void add(QueryToFilterAdapter filter) throws IOException { + QueryToFilterAdapter mergedFilter = filter.union(rewrittenTopLevelQuery); + if (mergedFilter.isInefficientUnion()) { + /* + * For now any complex union kicks us out of filter by filter + * mode. Its possible that this de-optimizes many "filters" + * aggregations but likely correct when "range", "date_histogram", + * or "terms" are converted to this agg. We investigated a sort + * of "combined" iteration mechanism and its complex *and* slower + * than the native implementations of the aggs above. + */ + valid = false; + return; + } + filters.add(mergedFilter); + } + + /** + * Build the the adapter or {@code null} if the this isn't a valid rewrite. + */ + public final T build() throws IOException { + if (false == valid) { + return null; + } + class AdapterBuild implements CheckedFunction { + private FilterByFilterAggregator agg; + + @Override + public FilterByFilterAggregator apply(AggregatorFactories subAggregators) throws IOException { + agg = new FilterByFilterAggregator(name, subAggregators, filters, keyed, context, parent, cardinality, metadata); + return agg; + } + } + AdapterBuild adapterBuild = new AdapterBuild(); + T result = adapt(adapterBuild); + if (adapterBuild.agg.scoreMode().needsScores()) { + /* + * Filter by filter won't produce the correct results if the + * sub-aggregators need scores because we're not careful with how + * we merge filters. Right now we have to build the whole + * aggregation in order to know if it'll need scores or not. + * This means we'll build the *sub-aggs* too. Oh well. + */ + return null; + } + return result; + } + } + + /** + * Count of segments with "live" docs. This is both deleted docs and + * docs covered by field level security. + */ + private int segmentsWithDeletedDocs; + /** + * Count of segments with documents have consult the {@code doc_count} + * field. + */ + private int segmentsWithDocCountField; + /** + * Count of segments this aggregator performed a document by document + * collection for. We have to collect when there are sub-aggregations + * and it disables some optimizations we can make while just counting. + */ + private int segmentsCollected; + /** + * Count of segments this aggregator counted. We can count when there + * aren't any sub-aggregators and we have some counting optimizations + * that don't apply to document by document collections. + *

+ * But the "fallback" for counting when we don't have a fancy optimization + * is to perform document by document collection and increment a counter + * on each document. This fallback does not increment the + * {@link #segmentsCollected} counter and does increment + * the {@link #segmentsCounted} counter because those counters are to + * signal which operation we were allowed to perform. The filters + * themselves will have debugging counters measuring if they could + * perform the count from metadata or had to fall back. + */ + private int segmentsCounted; + + /** + * Build the aggregation. Private to force callers to go through the + * {@link AdapterBuilder} which centralizes the logic to decide if this + * aggregator would be faster than the native implementation. + */ + private FilterByFilterAggregator( + String name, + AggregatorFactories factories, + List> filters, + boolean keyed, + AggregationContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, filters, keyed, null, context, parent, cardinality, metadata); + } + + /** + * Instead of returning a {@link LeafBucketCollector} we do the + * collection ourselves by running the filters directly. This is safe + * because we only use this aggregator if there isn't a {@code parent} + * which would change how we collect buckets and because we take the + * top level query into account when building the filters. + */ + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + assert scoreMode().needsScores() == false; + if (filters().size() == 0) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } + Bits live = ctx.reader().getLiveDocs(); + if (false == docCountProvider.alwaysOne()) { + segmentsWithDocCountField++; + } + if (subAggregators.length == 0) { + // TOOD we'd be better off if we could do sub.isNoop() or something. + /* + * Without sub.isNoop we always end up in the `collectXXX` modes even if + * the sub-aggregators opt out of traditional collection. + */ + segmentsCounted++; + collectCount(ctx, live); + } else { + segmentsCollected++; + collectSubs(ctx, live, sub); + } + return LeafBucketCollector.NO_OP_COLLECTOR; + } + + /** + * Gather a count of the number of documents that match each filter + * without sending any documents to a sub-aggregator. This yields + * the correct response when there aren't any sub-aggregators or they + * all opt out of needing any sort of collection. + */ + private void collectCount(LeafReaderContext ctx, Bits live) throws IOException { + Counter counter = new Counter(docCountProvider); + for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { + incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); + } + } + + /** + * Collect all documents that match all filters and send them to + * the sub-aggregators. This method is only required when there are + * sub-aggregators that haven't opted out of being collected. + *

+ * This collects each filter one at a time, resetting the + * sub-aggregators between each filter as though they were hitting + * a fresh segment. + *

+ * It's very tempting to try and collect the + * filters into blocks of matches and then reply the whole block + * into ascending order without the resetting. That'd probably + * work better if the disk was very, very slow and we didn't have + * any kind of disk caching. But with disk caching its about twice + * as fast to collect each filter one by one like this. And it uses + * less memory because there isn't a need to buffer a block of matches. + * And its a hell of a lot less code. + */ + private void collectSubs(LeafReaderContext ctx, Bits live, LeafBucketCollector sub) throws IOException { + class MatchCollector implements LeafCollector { + LeafBucketCollector subCollector = sub; + int filterOrd; + + @Override + public void collect(int docId) throws IOException { + collectBucket(subCollector, docId, filterOrd); + } + + @Override + public void setScorer(Scorable scorer) throws IOException { + } + } + MatchCollector collector = new MatchCollector(); + filters().get(0).collect(ctx, collector, live); + for (int filterOrd = 1; filterOrd < filters().size(); filterOrd++) { + collector.subCollector = collectableSubAggregators.getLeafCollector(ctx); + collector.filterOrd = filterOrd; + filters().get(filterOrd).collect(ctx, collector, live); + } + } + + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + add.accept("segments_counted", segmentsCounted); + add.accept("segments_collected", segmentsCollected); + add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); + add.accept("segments_with_doc_count_field", segmentsWithDocCountField); + } + + CheckedSupplier canUseMetadata(LeafReaderContext ctx) { + return new CheckedSupplier() { + Boolean canUse; + + @Override + public Boolean get() throws IOException { + if (canUse == null) { + canUse = canUse(); + } + return canUse; + } + + private boolean canUse() throws IOException { + if (ctx.reader().getLiveDocs() != null) { + return false; + } + docCountProvider.setLeafReaderContext(ctx); + return docCountProvider.alwaysOne(); + } + }; + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 094b9bcd58059..b83b7c5a06676 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -11,14 +11,13 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorable; -import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; -import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -43,7 +42,7 @@ /** * Aggregator for {@code filters}. There are two known subclasses, - * {@link FilterByFilter} which is fast but only works in some cases and + * {@link FilterByFilterAggregator} which is fast but only works in some cases and * {@link Compatible} which works in all cases. * {@link FiltersAggregator#build} will build the fastest version that * works with the configuration. @@ -119,7 +118,7 @@ public boolean equals(Object obj) { /** * Build an {@link Aggregator} for a {@code filters} aggregation. If there * isn't a parent, there aren't children, and we don't collect "other" - * buckets then this will a faster {@link FilterByFilter} aggregator. + * buckets then this will a faster {@link FilterByFilterAggregator} aggregator. * Otherwise it'll fall back to a slower aggregator that is * {@link Compatible} with parent, children, and "other" buckets. */ @@ -134,28 +133,29 @@ public static FiltersAggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (canUseFilterByFilter(parent, otherBucketKey)) { - FilterByFilter filterByFilter = buildFilterByFilter( + FilterByFilterAggregator.AdapterBuilder filterByFilterBuilder = + new FilterByFilterAggregator.AdapterBuilder( name, - factories, - filters, keyed, otherBucketKey, context, parent, cardinality, metadata - ); - if (false == filterByFilter.scoreMode().needsScores()) { - /* - * Filter by filter won't produce the correct results if the - * sub-aggregators need scores because we're not careful with how - * we merge filters. Right now we have to build the whole - * aggregation in order to know if it'll need scores or not. - */ - // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter - return filterByFilter; - } + ) { + @Override + protected FilterByFilterAggregator adapt( + CheckedFunction delegate + ) throws IOException { + return delegate.apply(factories); + } + }; + for (QueryToFilterAdapter f : filters) { + filterByFilterBuilder.add(f); + } + FilterByFilterAggregator filterByFilter = filterByFilterBuilder.build(); + if (filterByFilter != null) { + return filterByFilter; } return new FiltersAggregator.Compatible( name, @@ -170,60 +170,11 @@ public static FiltersAggregator build( ); } - /** - * Can this aggregation be executed using the {@link FilterByFilter}? That - * aggregator is much faster than the fallback {@link Compatible} aggregator. - */ - public static boolean canUseFilterByFilter(Aggregator parent, String otherBucketKey) { - return parent == null && otherBucketKey == null; - } - - /** - * Build an {@link Aggregator} for a {@code filters} aggregation if we - * can collect {@link FilterByFilter}, otherwise return {@code null}. We can - * collect filter by filter if there isn't a parent, there aren't children, - * and we don't collect "other" buckets. Collecting {@link FilterByFilter} - * is generally going to be much faster than the {@link Compatible} aggregator. - *

- * Important: This doesn't properly handle sub-aggregators - * that need scores so callers must check {@code #scoreMode()} and not use - * this collector if it need scores. - */ - public static FilterByFilter buildFilterByFilter( - String name, - AggregatorFactories factories, - List> filters, - boolean keyed, - String otherBucketKey, - AggregationContext context, - Aggregator parent, - CardinalityUpperBound cardinality, - Map metadata - ) throws IOException { - if (false == canUseFilterByFilter(parent, otherBucketKey)) { - throw new IllegalStateException("Can't execute filter-by-filter"); - } - List> filtersWithTopLevel = new ArrayList<>(filters.size()); - for (QueryToFilterAdapter f : filters) { - filtersWithTopLevel.add(f.union(context.query())); - } - return new FiltersAggregator.FilterByFilter( - name, - factories, - filtersWithTopLevel, - keyed, - context, - parent, - cardinality, - metadata - ); - } - private final List> filters; private final boolean keyed; protected final String otherBucketKey; - private FiltersAggregator(String name, AggregatorFactories factories, List> filters, boolean keyed, + FiltersAggregator(String name, AggregatorFactories factories, List> filters, boolean keyed, String otherBucketKey, AggregationContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { super(name, factories, context, parent, cardinality.multiply(filters.size() + (otherBucketKey == null ? 0 : 1)), metadata); @@ -277,226 +228,13 @@ public void collectDebugInfo(BiConsumer add) { add.accept("filters", filtersDebug); } - /** - * Collects results by running each filter against the searcher and doesn't - * build any {@link LeafBucketCollector}s which is generally faster than - * {@link Compatible} but doesn't support when there is a parent aggregator - * or any child aggregators. - */ - public static class FilterByFilter extends FiltersAggregator { - private final boolean profiling; - private long estimatedCost = -1; - /** - * The maximum allowed estimated cost. Defaults to {@code -1} meaning no - * max but can be set. Used for emitting debug info. - */ - private long maxCost = -1; - private long estimateCostTime; - private int segmentsWithDeletedDocs; - /** - * Count of segments with documents have consult the {@code doc_count} - * field. - */ - private int segmentsWithDocCountField; - private int segmentsCollected; - private int segmentsCounted; - - private FilterByFilter( - String name, - AggregatorFactories factories, - List> filters, - boolean keyed, - AggregationContext context, - Aggregator parent, - CardinalityUpperBound cardinality, - Map metadata - ) throws IOException { - super(name, factories, filters, keyed, null, context, parent, cardinality, metadata); - this.profiling = context.profiling(); - } - - /** - * Estimate the number of documents that this aggregation must visit. We'll - * stop counting once we've passed {@code maxEstimatedCost} if we aren't profiling. - */ - @SuppressWarnings("resource") // We're not in change of anything Closeable - public long estimateCost(long maxCost) throws IOException { - assert scoreMode().needsScores() == false; - // TODO if we have children we should use a different cost estimate - this.maxCost = maxCost; - if (estimatedCost != -1) { - return estimatedCost; - } - long start = profiling ? System.nanoTime() : 0; - estimatedCost = 0; - for (LeafReaderContext ctx : searcher().getIndexReader().leaves()) { - CheckedSupplier canUseMetadata = canUseMetadata(ctx); - for (QueryToFilterAdapter filter : filters()) { - estimatedCost += subAggregators().length > 0 - ? filter.estimateCollectCost(ctx) - : filter.estimateCountCost(ctx, canUseMetadata); - if (estimatedCost < 0) { - // We've overflowed so we cap out and stop counting. - estimatedCost = Long.MAX_VALUE; - if (profiling && estimateCostTime == 0) { - estimateCostTime = System.nanoTime() - start; - } - return estimatedCost; - } - if (estimatedCost > maxCost) { - if (profiling) { - /* - * If we're profiling we stop the timer the first - * time we pass the limit but we keep counting so - * we get an accurate estimate. - */ - if (estimateCostTime == 0) { - estimateCostTime = System.nanoTime() - start; - } - } else { - // We're past the limit and not profiling. No use counting further. - return estimatedCost; - } - } - } - } - if (profiling && estimateCostTime == 0) { - estimateCostTime = System.nanoTime() - start; - } - return estimatedCost; - } - - /** - * Instead of returning a {@link LeafBucketCollector} we do the - * collection ourselves by running the filters directly. This is safe - * because we only use this aggregator if there isn't a {@code parent} - * which would change how we collect buckets and because we take the - * top level query into account when building the filters. - */ - @Override - protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - assert scoreMode().needsScores() == false; - if (filters().size() == 0) { - return LeafBucketCollector.NO_OP_COLLECTOR; - } - Bits live = ctx.reader().getLiveDocs(); - if (false == docCountProvider.alwaysOne()) { - segmentsWithDocCountField++; - } - if (subAggregators.length == 0) { - // TOOD we'd be better off if we could do sub.isNoop() or something. - /* - * Without sub.isNoop we always end up in the `collectXXX` modes even if - * the sub-aggregators opt out of traditional collection. - */ - segmentsCounted++; - collectCount(ctx, live); - } else { - segmentsCollected++; - collectSubs(ctx, live, sub); - } - return LeafBucketCollector.NO_OP_COLLECTOR; - } - - /** - * Gather a count of the number of documents that match each filter - * without sending any documents to a sub-aggregator. This yields - * the correct response when there aren't any sub-aggregators or they - * all opt out of needing any sort of collection. - */ - private void collectCount(LeafReaderContext ctx, Bits live) throws IOException { - Counter counter = new Counter(docCountProvider); - for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { - incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); - } - } - - /** - * Collect all documents that match all filters and send them to - * the sub-aggregators. This method is only required when there are - * sub-aggregators that haven't opted out of being collected. - *

- * This collects each filter one at a time, resetting the - * sub-aggregators between each filter as though they were hitting - * a fresh segment. - *

- * It's very tempting to try and collect the - * filters into blocks of matches and then reply the whole block - * into ascending order without the resetting. That'd probably - * work better if the disk was very, very slow and we didn't have - * any kind of disk caching. But with disk caching its about twice - * as fast to collect each filter one by one like this. And it uses - * less memory because there isn't a need to buffer a block of matches. - * And its a hell of a lot less code. - */ - private void collectSubs(LeafReaderContext ctx, Bits live, LeafBucketCollector sub) throws IOException { - class MatchCollector implements LeafCollector { - LeafBucketCollector subCollector = sub; - int filterOrd; - - @Override - public void collect(int docId) throws IOException { - collectBucket(subCollector, docId, filterOrd); - } - - @Override - public void setScorer(Scorable scorer) throws IOException { - } - } - MatchCollector collector = new MatchCollector(); - filters().get(0).collect(ctx, collector, live); - for (int filterOrd = 1; filterOrd < filters().size(); filterOrd++) { - collector.subCollector = collectableSubAggregators.getLeafCollector(ctx); - collector.filterOrd = filterOrd; - filters().get(filterOrd).collect(ctx, collector, live); - } - } - - @Override - public void collectDebugInfo(BiConsumer add) { - super.collectDebugInfo(add); - add.accept("segments_counted", segmentsCounted); - add.accept("segments_collected", segmentsCollected); - add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); - add.accept("segments_with_doc_count_field", segmentsWithDocCountField); - if (estimatedCost != -1) { - // -1 means we didn't estimate it. - add.accept("estimated_cost", estimatedCost); - add.accept("max_cost", maxCost); - add.accept("estimate_cost_time", estimateCostTime); - } - } - - CheckedSupplier canUseMetadata(LeafReaderContext ctx) { - return new CheckedSupplier() { - Boolean canUse; - - @Override - public Boolean get() throws IOException { - if (canUse == null) { - canUse = canUse(); - } - return canUse; - } - - private boolean canUse() throws IOException { - if (ctx.reader().getLiveDocs() != null) { - return false; - } - docCountProvider.setLeafReaderContext(ctx); - return docCountProvider.alwaysOne(); - } - }; - } - } - /** * Collects results by building a {@link LongPredicate} per filter and testing if * each doc sent to its {@link LeafBucketCollector} is in each filter - * which is generally slower than {@link FilterByFilter} but is compatible + * which is generally slower than {@link FilterByFilterAggregator} but is compatible * with parent and child aggregations. */ - private static class Compatible extends FiltersAggregator { + static class Compatible extends FiltersAggregator { private final int totalNumKeys; Compatible( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchAllQueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchAllQueryToFilterAdapter.java index f3b39fb0c55f5..95fbb8449cd6b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchAllQueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchAllQueryToFilterAdapter.java @@ -13,7 +13,6 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; import java.io.IOException; import java.util.function.BiConsumer; @@ -48,11 +47,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) return super.count(ctx, counter, live); } - @Override - long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { - return canUseMetadata.get() ? 0 : ctx.reader().maxDoc(); - } - @Override void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchNoneQueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchNoneQueryToFilterAdapter.java index b6282debfbd49..59cec2207011b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchNoneQueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MatchNoneQueryToFilterAdapter.java @@ -13,7 +13,6 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; import java.io.IOException; import java.util.function.BiConsumer; @@ -42,11 +41,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) return 0; } - @Override - long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { - return 0; - } - @Override void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java index eb365c2c38bc4..29534bb1d69fa 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -82,18 +81,6 @@ public static QueryToFilterAdapter build(IndexSearcher searcher, String key, * {@link #weight()} to build it when needed. */ private Weight weight; - /** - * Scorer for each segment or {@code null} if we haven't built the scorer. - * Use {@link #bulkScorer(LeafReaderContext, Runnable)} to build the scorer - * when needed. - */ - private BulkScorer[] bulkScorers; - /** - * The number of scorers we prepared just to estimate the cost of counting - * documents. For some queries preparing the scorers is very slow so its - * nice to know how many we built. Exposed by profiling. - */ - private int scorersPreparedWhileEstimatingCost; QueryToFilterAdapter(IndexSearcher searcher, String key, Q query) { this.searcher = searcher; @@ -111,6 +98,16 @@ Q query() { return query; } + /** + * Is this an inefficient union of the top level query with the filter? + * If the top level query if complex we can't efficiently merge it with + * the filter. If we can't do that it is likely faster to just run the + * "native" aggregation implementation rather than go filter by filter. + */ + public boolean isInefficientUnion() { + return false; + } + /** * Key for this filter. */ @@ -179,7 +176,11 @@ QueryToFilterAdapter union(Query extraQuery) throws IOException { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(query, BooleanClause.Occur.MUST); builder.add(extraQuery, BooleanClause.Occur.MUST); - return new QueryToFilterAdapter<>(searcher(), key(), builder.build()); + return new QueryToFilterAdapter(searcher(), key(), builder.build()) { + public boolean isInefficientUnion() { + return true; + } + }; } private static Query unwrap(Query query) { @@ -217,7 +218,7 @@ IntPredicate matchingDocIds(LeafReaderContext ctx) throws IOException { * Count the number of documents that match this filter in a leaf. */ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException { - BulkScorer scorer = bulkScorer(ctx, () -> {}); + BulkScorer scorer = weight().bulkScorer(ctx); if (scorer == null) { // No hits in this segment. return 0; @@ -226,18 +227,11 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) return counter.readAndReset(ctx); } - /** - * Estimate the cost of calling {@code #count} in a leaf. - */ - long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { - return estimateCollectCost(ctx); - } - /** * Collect all documents that match this filter in this leaf. */ void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws IOException { - BulkScorer scorer = bulkScorer(ctx, () -> {}); + BulkScorer scorer = weight().bulkScorer(ctx); if (scorer == null) { // No hits in this segment. return; @@ -245,18 +239,6 @@ void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws I scorer.score(collector, live); } - /** - * Estimate the cost of calling {@code #count} in a leaf. - */ - long estimateCollectCost(LeafReaderContext ctx) throws IOException { - BulkScorer scorer = bulkScorer(ctx, () -> scorersPreparedWhileEstimatingCost++); - if (scorer == null) { - // There aren't any matches for this filter in this leaf - return 0; - } - return scorer.cost(); // TODO change this to ScorerSupplier.cost - } - /** * Collect profiling information for this filter. Rhymes with * {@link Aggregator#collectDebugInfo(BiConsumer)}. @@ -270,18 +252,6 @@ long estimateCollectCost(LeafReaderContext ctx) throws IOException { */ void collectDebugInfo(BiConsumer add) { add.accept("query", query.toString()); - add.accept("scorers_prepared_while_estimating_cost", scorersPreparedWhileEstimatingCost); - } - - private BulkScorer bulkScorer(LeafReaderContext ctx, Runnable onPrepare) throws IOException { - if (bulkScorers == null) { - bulkScorers = new BulkScorer[searcher().getIndexReader().leaves().size()]; - } - if (bulkScorers[ctx.ord] == null) { - onPrepare.run(); - return bulkScorers[ctx.ord] = weight().bulkScorer(ctx); - } - return bulkScorers[ctx.ord]; } private Weight weight() throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/TermQueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/TermQueryToFilterAdapter.java index 9cbd3f72c9a78..bc7ab8f6b8d52 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/TermQueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/TermQueryToFilterAdapter.java @@ -12,7 +12,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedSupplier; import java.io.IOException; import java.util.function.BiConsumer; @@ -36,14 +35,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) return super.count(ctx, counter, live); } - @Override - long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { - if (canUseMetadata.get()) { - return 0; - } - return super.estimateCountCost(ctx, canUseMetadata); - } - @Override void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java index ccc01eccdfb54..1e1314e3da063 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java @@ -63,7 +63,6 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { rangeFactory, ranges, averageDocsPerRange, - null, // null here because we didn't try filters at all keyed, context, parent, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index b8bb7bda6c7b1..f7e6b697d3140 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -10,17 +10,17 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScorerSupplier; -import org.elasticsearch.core.CheckedFunction; -import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ContextParser; import org.elasticsearch.common.xcontent.ObjectParser.ValueType; +import org.elasticsearch.common.xcontent.ParseField; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; @@ -36,7 +36,7 @@ import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; -import org.elasticsearch.search.aggregations.bucket.filter.QueryToFilterAdapter; +import org.elasticsearch.search.aggregations.bucket.filter.FilterByFilterAggregator; import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; import org.elasticsearch.search.aggregations.bucket.filter.InternalFilters; import org.elasticsearch.search.aggregations.bucket.range.InternalRange.Factory; @@ -47,7 +47,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -291,23 +290,8 @@ public static Aggregator build( cardinality, metadata ); - Map filtersDebug = null; if (adapted != null) { - long maxEstimatedFiltersCost = context.searcher().getIndexReader().maxDoc(); - long estimatedFiltersCost = adapted.estimateCost(maxEstimatedFiltersCost); - if (estimatedFiltersCost <= maxEstimatedFiltersCost) { - return adapted; - } - /* - * Looks like it'd be more expensive to use the filter-by-filter - * aggregator. Oh well. Snapshot the the filter-by-filter - * aggregator's debug information if we're profiling bececause it - * is useful even if the aggregator isn't. - */ - if (context.profiling()) { - filtersDebug = new HashMap<>(); - adapted.delegate().collectDebugInfo(filtersDebug::put); - } + return adapted; } return buildWithoutAttemptedToAdaptToFilters( name, @@ -317,7 +301,6 @@ public static Aggregator build( rangeFactory, ranges, averageDocsPerRange, - filtersDebug, keyed, context, parent, @@ -350,14 +333,27 @@ public static FromFilters adaptIntoFiltersOrNull( // We don't generate sensible Queries for nanoseconds. return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { - return null; - } if (false == context.enableRewriteToFilterByFilter()) { return null; } boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint(); - List> filters = new ArrayList<>(ranges.length); + FilterByFilterAggregator.AdapterBuilder> filterByFilterBuilder = new FilterByFilterAggregator.AdapterBuilder< + FromFilters>(name, false, null, context, parent, cardinality, metadata) { + @Override + protected FromFilters adapt(CheckedFunction delegate) + throws IOException { + return new FromFilters<>( + parent, + factories, + delegate, + valuesSourceConfig.format(), + ranges, + keyed, + rangeFactory, + averageDocsPerRange + ); + } + }; for (int i = 0; i < ranges.length; i++) { /* * If the bounds on the ranges are too high then the `double`s @@ -383,41 +379,9 @@ public static FromFilters adaptIntoFiltersOrNull( RangeQueryBuilder builder = new RangeQueryBuilder(valuesSourceConfig.fieldType().name()); builder.from(ranges[i].from == Double.NEGATIVE_INFINITY ? null : format.format(ranges[i].from)).includeLower(true); builder.to(ranges[i].to == Double.POSITIVE_INFINITY ? null : format.format(ranges[i].to)).includeUpper(false); - filters.add(QueryToFilterAdapter.build(context.searcher(), Integer.toString(i), context.buildQuery(builder))); + filterByFilterBuilder.add(Integer.toString(i), context.buildQuery(builder)); } - RangeAggregator.FromFilters fromFilters = new RangeAggregator.FromFilters<>( - parent, - factories, - subAggregators -> { - return FiltersAggregator.buildFilterByFilter( - name, - subAggregators, - filters, - false, - null, - context, - parent, - cardinality, - metadata - ); - }, - valuesSourceConfig.format(), - ranges, - keyed, - rangeFactory, - averageDocsPerRange - ); - if (fromFilters.scoreMode().needsScores()) { - /* - * Filter by filter won't produce the correct results if the - * sub-aggregators need scores because we're not careful with how - * we merge filters. Right now we have to build the whole - * aggregation in order to know if it'll need scores or not. - */ - // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter - return null; - } - return fromFilters; + return filterByFilterBuilder.build(); } public static Aggregator buildWithoutAttemptedToAdaptToFilters( @@ -428,7 +392,6 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( InternalRange.Factory rangeFactory, Range[] ranges, double averageDocsPerRange, - Map filtersDebug, boolean keyed, AggregationContext context, Aggregator parent, @@ -444,7 +407,6 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( rangeFactory, ranges, averageDocsPerRange, - filtersDebug, keyed, context, parent, @@ -460,7 +422,6 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( rangeFactory, ranges, averageDocsPerRange, - filtersDebug, keyed, context, parent, @@ -475,7 +436,6 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( private final boolean keyed; private final InternalRange.Factory rangeFactory; private final double averageDocsPerRange; - private final Map filtersDebug; private RangeAggregator( String name, @@ -485,7 +445,6 @@ private RangeAggregator( InternalRange.Factory rangeFactory, Range[] ranges, double averageDocsPerRange, - Map filtersDebug, boolean keyed, AggregationContext context, Aggregator parent, @@ -500,7 +459,6 @@ private RangeAggregator( this.rangeFactory = rangeFactory; this.ranges = ranges; this.averageDocsPerRange = averageDocsPerRange; - this.filtersDebug = filtersDebug; } @Override @@ -560,9 +518,6 @@ public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); add.accept("ranges", ranges.length); add.accept("average_docs_per_range", averageDocsPerRange); - if (filtersDebug != null) { - add.accept("filters_debug", filtersDebug); - } } public static class Unmapped extends NonCollectingAggregator { @@ -604,7 +559,7 @@ public InternalAggregation buildEmptyAggregation() { protected abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; - private static class NoOverlap extends RangeAggregator { + static class NoOverlap extends RangeAggregator { NoOverlap( String name, AggregatorFactories factories, @@ -613,7 +568,6 @@ private static class NoOverlap extends RangeAggregator { Factory rangeFactory, Range[] ranges, double averageDocsPerRange, - Map filtersDebug, boolean keyed, AggregationContext context, Aggregator parent, @@ -628,7 +582,6 @@ private static class NoOverlap extends RangeAggregator { rangeFactory, ranges, averageDocsPerRange, - filtersDebug, keyed, context, parent, @@ -665,7 +618,6 @@ private static class Overlap extends RangeAggregator { Factory rangeFactory, Range[] ranges, double averageDocsPerRange, - Map filtersDebug, boolean keyed, AggregationContext context, Aggregator parent, @@ -680,7 +632,6 @@ private static class Overlap extends RangeAggregator { rangeFactory, ranges, averageDocsPerRange, - filtersDebug, keyed, context, parent, @@ -758,7 +709,7 @@ static class FromFilters extends AdaptingAggrega FromFilters( Aggregator parent, AggregatorFactories subAggregators, - CheckedFunction delegate, + CheckedFunction delegate, DocValueFormat format, Range[] ranges, boolean keyed, @@ -773,13 +724,6 @@ static class FromFilters extends AdaptingAggrega this.averageDocsPerRange = averageDocsPerRange; } - /** - * Estimate the number of documents that this aggregation must visit. - */ - long estimateCost(long maxEstimatedCost) throws IOException { - return ((FiltersAggregator.FilterByFilter) delegate()).estimateCost(maxEstimatedCost); - } - @Override protected InternalAggregation adapt(InternalAggregation delegateResult) { InternalFilters filters = (InternalFilters) delegateResult; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java index 86722cbc5bad3..510faeb2071cb 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java @@ -23,9 +23,8 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalOrder; import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket; -import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.FilterByFilterAggregator; import org.elasticsearch.search.aggregations.bucket.filter.InternalFilters; -import org.elasticsearch.search.aggregations.bucket.filter.QueryToFilterAdapter; import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator.OrdBucket; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.support.AggregationContext; @@ -68,11 +67,34 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( if (false == valuesSourceConfig.alignesWithSearchIndex()) { return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { - return null; - } - List> filters = new ArrayList<>(); TermsEnum terms = values.termsEnum(); + FilterByFilterAggregator.AdapterBuilder filterByFilterBuilder = + new FilterByFilterAggregator.AdapterBuilder( + name, + false, + null, + context, + parent, + cardinality, + metadata + ) { + @Override + protected StringTermsAggregatorFromFilters adapt( + CheckedFunction delegate + ) throws IOException { + return new StringTermsAggregatorFromFilters( + parent, + factories, + delegate, + showTermDocCountError, + valuesSourceConfig.format(), + order, + bucketCountThresholds, + terms + ); + } + }; + String field = valuesSourceConfig.fieldContext().field(); for (long ord = 0; ord < values.getValueCount(); ord++) { if (acceptedOrds.test(ord) == false) { continue; @@ -86,42 +108,10 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( * the segment ordinal to the global ordinal. You could * search the mapping to get it but, like I said, tricky. */ - TermQueryBuilder b = new TermQueryBuilder( - valuesSourceConfig.fieldContext().field(), - valuesSourceConfig.format().format(terms.term()) - ); - filters.add(QueryToFilterAdapter.build(context.searcher(), Long.toString(ord), context.buildQuery(b))); - } - StringTermsAggregatorFromFilters adapted = new StringTermsAggregatorFromFilters( - parent, - factories, - subAggs -> FiltersAggregator.buildFilterByFilter( - name, - subAggs, - filters, - false, - null, - context, - parent, - cardinality, - metadata - ), - showTermDocCountError, - valuesSourceConfig.format(), - order, - bucketCountThresholds, - terms - ); - if (adapted.scoreMode().needsScores()) { /* - * Filter by filter won't produce the correct results if the - * sub-aggregators need scores because we're not careful with how - * we merge filters. Right now we have to build the whole - * aggregation in order to know if it'll need scores or not. - */ - // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter - return null; + TermQueryBuilder builder = new TermQueryBuilder(field, valuesSourceConfig.format().format(terms.term())); + filterByFilterBuilder.add(Long.toString(ord), context.buildQuery(builder)); } - return adapted; + return filterByFilterBuilder.build(); } private final boolean showTermDocCountError; @@ -133,7 +123,7 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( public StringTermsAggregatorFromFilters( Aggregator parent, AggregatorFactories subAggregators, - CheckedFunction delegate, + CheckedFunction delegate, boolean showTermDocCountError, DocValueFormat format, BucketOrder order, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java index 7a6fbe7526404..3d60e6c5c6ad2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java @@ -32,7 +32,7 @@ import org.elasticsearch.script.ScriptContext; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer; -import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.FilterByFilter; +import org.elasticsearch.search.aggregations.bucket.filter.FilterByFilterAggregator; import org.elasticsearch.search.internal.SubSearchContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.profile.aggregation.AggregationProfiler; @@ -251,8 +251,8 @@ public final AggregationUsageService getUsageService() { /** * Are aggregations allowed to try to rewrite themselves into - * {@link FilterByFilter} aggregations? Often - * {@linkplain FilterByFilter} is faster to execute, but it isn't + * {@link FilterByFilterAggregator} aggregations? Often + * {@linkplain FilterByFilterAggregator} is faster to execute, but it isn't * always. For now this just hooks into a cluster level setting * so users can disable the behavior when the existing heuristics * don't detect cases where its slower. diff --git a/server/src/main/java/org/elasticsearch/search/runtime/AbstractScriptFieldQuery.java b/server/src/main/java/org/elasticsearch/search/runtime/AbstractScriptFieldQuery.java index 9acf2b6c9cc83..bcfece28b063c 100644 --- a/server/src/main/java/org/elasticsearch/search/runtime/AbstractScriptFieldQuery.java +++ b/server/src/main/java/org/elasticsearch/search/runtime/AbstractScriptFieldQuery.java @@ -29,7 +29,7 @@ /** * Abstract base class for building queries based on script fields. */ -abstract class AbstractScriptFieldQuery extends Query { +public abstract class AbstractScriptFieldQuery extends Query { /** * We don't have the infrastructure to estimate the match cost of a script * so we just use a big number. diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 782757f21295b..87113a671016d 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -20,18 +21,21 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; -import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.time.DateFormatter; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.mapper.CustomTermFreqField; import org.elasticsearch.index.mapper.DateFieldMapper; @@ -83,13 +87,10 @@ import static io.github.nik9000.mapmatcher.ListMatcher.matchesList; import static io.github.nik9000.mapmatcher.MapMatcher.assertMap; import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap; -import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.nullValue; import static org.mockito.Mockito.mock; @@ -375,7 +376,7 @@ public void testWithMergedPointRangeQueries() throws IOException { }, ft); } - public void testFilterByFilterCost() throws IOException { + public void testRangeFilter() throws IOException { MappedFieldType ft = new DateFieldMapper.DateFieldType( "test", true, @@ -391,45 +392,30 @@ public void testFilterByFilterCost() throws IOException { "test", new KeyedFilter("q1", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) ); - withAggregator( - builder, - new MatchAllDocsQuery(), - iw -> { - iw.addDocument( - org.elasticsearch.core.List.of( - new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")) - ) - ); - iw.addDocument( - org.elasticsearch.core.List.of( - new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")) - ) - ); - }, - (searcher, agg) -> { - assertThat(agg, instanceOf(FiltersAggregator.FilterByFilter.class)); - FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) agg; - int maxDoc = searcher.getIndexReader().maxDoc(); - assertThat(filterByFilter.estimateCost(maxDoc), equalTo(1L)); - Map debug = new HashMap<>(); - filterByFilter.collectDebugInfo(debug::put); - assertMap( - debug, - matchesMap().entry("segments_with_deleted_docs", 0) - .entry("estimated_cost", 1L) - .entry("max_cost", (long) maxDoc) - .entry("estimate_cost_time", 0L) - .entry("segments_with_doc_count_field", 0) - .entry("segments_counted", 0) + debugTestCase(builder, new MatchAllDocsQuery(), iw -> { + iw.addDocument( + org.elasticsearch.core.List.of(new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02"))) + ); + iw.addDocument( + org.elasticsearch.core.List.of(new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02"))) + ); + }, (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) .entry("segments_collected", 0) - .entry( - "filters", - matchesList().item(matchesMap().extraOk().entry("scorers_prepared_while_estimating_cost", greaterThan(0))) - ) - ); - }, - ft - ); + .entry("segments_counted", 1) + .entry("filters", matchesList().item(matchesMap().entry("query", "test:[1577836800000 TO 1583020799999]"))) + ) + ); + }, ft); } /** @@ -472,20 +458,33 @@ public void testMatchAll() throws IOException { iw.addDocument(org.elasticsearch.core.List.of()); } }; - withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - // The estimated cost is 0 because we're going to read from metadata - assertThat(((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE), equalTo(0L)); - Map debug = collectAndGetFilterDebugInfo(searcher, aggregator); - assertMap(debug, matchesMap().extraOk().entry("specialized_for", "match_all").entry("results_from_metadata", greaterThan(0))); - }); - testCase( + debugTestCase( builder, new MatchAllDocsQuery(), buildIndex, - (InternalFilters result) -> { - assertThat(result.getBuckets(), hasSize(1)); - assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L)); + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(10L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_counted", 1) + .entry("segments_collected", 0) + .entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "*:*") + .entry("specialized_for", "match_all") + .entry("results_from_metadata", 1) + ) + ) + ) + ); } ); } @@ -501,20 +500,33 @@ public void testMatchAllWithDocCount() throws IOException { ); } }; - withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - // The estimated cost is 0 because we're going to read from metadata - assertThat(((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE), equalTo(10L)); - Map debug = collectAndGetFilterDebugInfo(searcher, aggregator); - assertMap(debug, matchesMap().extraOk().entry("specialized_for", "match_all").entry("results_from_metadata", 0)); - }); - testCase( + debugTestCase( builder, new MatchAllDocsQuery(), buildIndex, - (InternalFilters result) -> { - assertThat(result.getBuckets(), hasSize(1)); - assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(55L)); + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(55L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_counted", 1) + .entry("segments_collected", 0) + .entry("segments_with_doc_count_field", 1) + .entry("segments_with_deleted_docs", 0) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "*:*") + .entry("specialized_for", "match_all") + .entry("results_from_metadata", 0) + ) + ) + ) + ); } ); } @@ -548,12 +560,11 @@ public void onCache(ShardId shardId, Accountable accountable) {} ); IndexSearcher searcher = newIndexSearcher(limitedReader); AggregationContext context = createAggregationContext(searcher, new MatchAllDocsQuery()); - FiltersAggregator.FilterByFilter aggregator = createAggregator(builder, context); - // The estimated cost is 0 because we're going to read from metadata - assertThat(((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE), equalTo(10L)); + FilterByFilterAggregator aggregator = createAggregator(builder, context); aggregator.preCollection(); searcher.search(context.query(), aggregator); aggregator.postCollection(); + InternalAggregation result = aggregator.buildTopLevel(); result = result.reduce( org.elasticsearch.core.List.of(result), @@ -567,39 +578,150 @@ public void onCache(ShardId shardId, Accountable accountable) {} InternalFilters filters = (InternalFilters) result; assertThat(filters.getBuckets(), hasSize(1)); assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(5L)); + Map debug = new HashMap<>(); - ((FiltersAggregator.FilterByFilter) aggregator).filters().get(0).collectDebugInfo(debug::put); - assertMap(debug, matchesMap().extraOk().entry("specialized_for", "match_all").entry("results_from_metadata", 0)); + aggregator.collectDebugInfo(debug::put); + assertMap( + debug, + matchesMap().entry("segments_counted", 1) + .entry("segments_collected", 0) + .entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "*:*") + .entry("specialized_for", "match_all") + .entry("results_from_metadata", 0) + ) + ) + ); } } } - public void testMatchNone() throws IOException { + public void testComplexUnionDisabledFilterByFilter() throws IOException { + MappedFieldType dft = new DateFieldMapper.DateFieldType( + "date", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null, + Collections.emptyMap() + ); + MappedFieldType kft = new KeywordFieldType("kwd"); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("date").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ); + debugTestCase(builder, new TermQuery(new Term("kwd", "a")), iw -> { + iw.addDocument( + org.elasticsearch.core.List.of( + new LongPoint("date", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")), + new Field("kwd", "a", KeywordFieldMapper.Defaults.FIELD_TYPE), + new SortedDocValuesField("kwd", new BytesRef("a")) + ) + ); + iw.addDocument( + org.elasticsearch.core.List.of( + new LongPoint("date", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")), + new Field("kwd", "a", KeywordFieldMapper.Defaults.FIELD_TYPE), + new SortedDocValuesField("kwd", new BytesRef("a")) + ) + ); + }, (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L)); + + assertThat(impl, equalTo(FiltersAggregator.Compatible.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("filters", matchesList().item(matchesMap().entry("query", "date:[1577836800000 TO 1583020799999]"))) + ) + ); + }, dft, kft); + } + + public void testMatchNoneFilter() throws IOException { AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", new RangeQueryBuilder("missing").gte(0))); CheckedConsumer buildIndex = iw -> { for (int i = 0; i < 10; i++) { iw.addDocument(org.elasticsearch.core.List.of(new LongPoint("t", i))); } }; - withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - // The estimated cost is 0 because we're going to read from metadata - assertThat(((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE), equalTo(0L)); - Map debug = collectAndGetFilterDebugInfo(searcher, aggregator); - assertMap(debug, matchesMap().extraOk().entry("specialized_for", "match_none")); - }); - testCase( + debugTestCase( builder, new MatchAllDocsQuery(), buildIndex, - (InternalFilters result) -> { - assertThat(result.getBuckets(), hasSize(1)); - assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L)); + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(0L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 0) + .entry("segments_counted", 1) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "MatchNoDocsQuery(\"User requested \"match_none\" query.\")") + .entry("specialized_for", "match_none") + ) + ) + ) + ); + } + ); + } + + public void testMatchNoneTopLevel() throws IOException { + AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", new RangeQueryBuilder("t").gte(0))); + CheckedConsumer buildIndex = iw -> { + for (int i = 0; i < 10; i++) { + iw.addDocument(org.elasticsearch.core.List.of(new LongPoint("t", i))); + } + }; + debugTestCase( + builder, + new MatchNoDocsQuery(), + buildIndex, + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(0L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 0) + .entry("segments_counted", 1) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "MatchNoDocsQuery(\"User requested \"match_none\" query.\")") + .entry("specialized_for", "match_none") + ) + ) + ) + ); } ); } - public void testTermQuery() throws IOException { + public void testTermFilter() throws IOException { KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, false, Collections.emptyMap()); AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", new MatchQueryBuilder("f", "0"))); CheckedConsumer buildIndex = iw -> { @@ -608,23 +730,77 @@ public void testTermQuery() throws IOException { iw.addDocument(org.elasticsearch.core.List.of(new Field("f", bytes, KeywordFieldMapper.Defaults.FIELD_TYPE))); } }; - withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - // The estimated cost is 0 because we're going to read from metadata - assertThat(((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE), equalTo(0L)); - Map debug = collectAndGetFilterDebugInfo(searcher, aggregator); - assertMap( - debug, - matchesMap().entry("specialized_for", "term") - .entry("query", "f:0") - .entry("results_from_metadata", greaterThan(0)) - .entry("scorers_prepared_while_estimating_cost", equalTo(0)) - ); - }, ft); - testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> { - assertThat(result.getBuckets(), hasSize(1)); - assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(4L)); - }, ft); + debugTestCase( + builder, + new MatchAllDocsQuery(), + buildIndex, + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(4L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 0) + .entry("segments_counted", 1) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "f:0") + .entry("specialized_for", "term") + .entry("results_from_metadata", greaterThan(0)) + ) + ) + ) + ); + }, + ft + ); + } + + public void testTermTopLevel() throws IOException { + KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, false, Collections.emptyMap()); + AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", new MatchAllQueryBuilder())); + CheckedConsumer buildIndex = iw -> { + for (int i = 0; i < 10; i++) { + BytesRef bytes = new BytesRef(Integer.toString(i % 3)); + iw.addDocument(org.elasticsearch.core.List.of(new Field("f", bytes, KeywordFieldMapper.Defaults.FIELD_TYPE))); + } + }; + debugTestCase( + builder, + new TermQuery(new Term("f", "0")), + buildIndex, + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(4L)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 0) + .entry("segments_counted", 1) + .entry( + "filters", + matchesList().item( + matchesMap().entry("query", "f:0") + .entry("specialized_for", "term") + .entry("results_from_metadata", greaterThan(0)) + ) + ) + ) + ); + }, + ft + ); } public void testSubAggs() throws IOException { @@ -670,36 +846,47 @@ public void testSubAggs() throws IOException { * assertion errors while executing. */ Collections.shuffle(docs, random()); - testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { - InternalFilters filters = (InternalFilters) result; - assertThat(filters.getBuckets(), hasSize(2)); - - InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); - assertThat(b.getDocCount(), equalTo(1L)); - InternalMax max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(100.0)); - InternalSum sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(100.0)); - - b = filters.getBucketByKey("q2"); - assertThat(b.getDocCount(), equalTo(2L)); - max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(10.0)); - sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(15.0)); - }, dateFt, intFt); - withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; - int maxDoc = searcher.getIndexReader().maxDoc(); - assertThat(filterByFilter.estimateCost(maxDoc), equalTo(3L)); - Map debug = new HashMap<>(); - filterByFilter.filters().get(0).collectDebugInfo(debug::put); - assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); - debug = new HashMap<>(); - filterByFilter.filters().get(1).collectDebugInfo(debug::put); - assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); - }, dateFt, intFt); + debugTestCase( + builder, + new MatchAllDocsQuery(), + iw -> iw.addDocuments(docs), + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(1L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(100.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(100.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(2L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(10.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(15.0)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 1) + .entry("segments_counted", 0) + .entry( + "filters", + matchesList().item(matchesMap().entry("query", "test:[1262304000000 TO 1267401599999]")) + .item(matchesMap().entry("query", "test:[1577836800000 TO 1583020799999]")) + ) + ).entry("test.s", matchesMap()).entry("test.m", matchesMap()) + ); + }, + dateFt, + intFt + ); } public void testSubAggsManyDocs() throws IOException { @@ -737,36 +924,43 @@ public void testSubAggsManyDocs() throws IOException { * assertion errors while executing. */ Collections.shuffle(docs, random()); - testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { - InternalFilters filters = (InternalFilters) result; - assertThat(filters.getBuckets(), hasSize(2)); - - InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); - assertThat(b.getDocCount(), equalTo(3334L)); - InternalMax max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(9999.0)); - InternalSum sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(16668333.0)); - - b = filters.getBucketByKey("q2"); - assertThat(b.getDocCount(), equalTo(6666L)); - max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(9998.0)); - sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(33326667.0)); - }, dateFt, intFt); - withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; - int maxDoc = searcher.getIndexReader().maxDoc(); - assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); - Map debug = new HashMap<>(); - filterByFilter.filters().get(0).collectDebugInfo(debug::put); - assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); - debug = new HashMap<>(); - filterByFilter.filters().get(1).collectDebugInfo(debug::put); - assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); - }, dateFt, intFt); + debugTestCase( + builder, + new MatchAllDocsQuery(), + iw -> iw.addDocuments(docs), + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 1) + .entry("segments_counted", 0) + .entry("filters", hasSize(2)) + ).entry("test.s", matchesMap()).entry("test.m", matchesMap()) + ); + }, + dateFt, + intFt + ); } public void testSubAggsManyFilters() throws IOException { @@ -812,35 +1006,42 @@ public void testSubAggsManyFilters() throws IOException { * assertion errors while executing. */ Collections.shuffle(docs, random()); - testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { - InternalFilters filters = (InternalFilters) result; - assertThat(filters.getBuckets(), hasSize(buckets.size())); - - InternalFilters.InternalBucket b = filters.getBucketByKey("2010-01-01 to 2010-01-31"); - assertThat(b.getDocCount(), equalTo(3334L)); - InternalMax max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(9999.0)); - InternalSum sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(16668333.0)); - - b = filters.getBucketByKey("2019-12-10 to 2020-01-09"); - assertThat(b.getDocCount(), equalTo(6666L)); - max = b.getAggregations().get("m"); - assertThat(max.getValue(), equalTo(9998.0)); - sum = b.getAggregations().get("s"); - assertThat(sum.getValue(), equalTo(33326667.0)); - }, dateFt, intFt); - withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; - int maxDoc = searcher.getIndexReader().maxDoc(); - assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); - for (int b = 0; b < buckets.size(); b++) { - Map debug = new HashMap<>(); - filterByFilter.filters().get(0).collectDebugInfo(debug::put); - assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); - } - }, dateFt, intFt); + debugTestCase( + builder, + new MatchAllDocsQuery(), + iw -> iw.addDocuments(docs), + (InternalFilters filters, Class impl, Map> debug) -> { + assertThat(filters.getBuckets(), hasSize(buckets.size())); + InternalFilters.InternalBucket b = filters.getBucketByKey("2010-01-01 to 2010-01-31"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("2019-12-10 to 2020-01-09"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + + assertThat(impl, equalTo(FilterByFilterAggregator.class)); + assertMap( + debug, + matchesMap().entry( + "test", + matchesMap().entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_collected", 1) + .entry("segments_counted", 0) + .entry("filters", hasSize(buckets.size())) + ).entry("test.s", matchesMap()).entry("test.m", matchesMap()) + ); + }, + dateFt, + intFt + ); } public void testDocValuesFieldExistsForDate() throws IOException { @@ -952,7 +1153,7 @@ private void docValuesFieldExistsTestCase( assertThat(result.getBuckets(), hasSize(1)); assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L)); - assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class)); + assertThat(impl, equalTo(FilterByFilterAggregator.class)); MapMatcher expectedFilterDebug = matchesMap().extraOk() .entry("specialized_for", "docvalues_field_exists") .entry("results_from_metadata", canUseMetadata ? greaterThan(0) : equalTo(0)); @@ -964,22 +1165,6 @@ private void docValuesFieldExistsTestCase( fieldType, fnft ); - withAggregator(builder, new MatchAllDocsQuery(), iw -> { - for (int i = 0; i < 10; i++) { - iw.addDocument(buildDocWithField.apply(i)); - } - for (int i = 0; i < 10; i++) { - iw.addDocument(org.elasticsearch.core.List.of()); - } - }, (searcher, aggregator) -> { - long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE); - Map debug = new HashMap<>(); - aggregator.collectDebugInfo(debug::put); - List filtersDebug = (List) debug.get("filters"); - Map filterDebug = (Map) filtersDebug.get(0); - assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L)); - assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0)); - }, fieldType, fnft); } private void docValuesFieldExistsNoDataTestCase( @@ -995,15 +1180,12 @@ private void docValuesFieldExistsNoDataTestCase( // Exists queries convert to MatchNone if this isn't defined FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true); withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); - long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE); - assertThat(estimatedCost, equalTo(0L)); + assertThat(aggregator, instanceOf(FilterByFilterAggregator.class)); Map debug = collectAndGetFilterDebugInfo(searcher, aggregator); assertMap(debug, matchesMap().extraOk() .entry("specialized_for", "docvalues_field_exists") - .entry("results_from_metadata", greaterThan(0)) - .entry("scorers_prepared_while_estimating_cost", equalTo(0))); + .entry("results_from_metadata", greaterThan(0))); }, fieldType, fnft); testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> { assertThat(result.getBuckets(), hasSize(1)); @@ -1023,7 +1205,7 @@ private Map collectAndGetFilterDebugInfo(IndexSearcher searcher, assertTrue(leafCollector.isNoop()); } Map debug = new HashMap<>(); - ((FiltersAggregator.FilterByFilter) aggregator).filters().get(0).collectDebugInfo(debug::put); + ((FilterByFilterAggregator) aggregator).filters().get(0).collectDebugInfo(debug::put); return debug; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index f4e21c3a6ea33..420ba7661ad5d 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -14,16 +14,15 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; -import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.DateFormatters; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; @@ -31,7 +30,6 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext; -import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; @@ -44,15 +42,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Consumer; import java.util.stream.IntStream; +import static io.github.nik9000.mapmatcher.ListMatcher.matchesList; +import static io.github.nik9000.mapmatcher.MapMatcher.assertMap; +import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap; import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; @@ -1258,27 +1258,41 @@ public void testOneBucketOptimized() throws IOException { DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f"); // Exists queries convert to MatchNone if this isn't defined FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true); - withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> { - assertThat(aggregator, instanceOf(DateHistogramAggregator.FromDateRange.class)); - aggregator.preCollection(); - for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { - LeafBucketCollector leafCollector = aggregator.getLeafCollector(ctx); - assertTrue(leafCollector.isNoop()); - } - Map debug = new HashMap<>(); - aggregator.collectDebugInfo(debug::put); - assertThat(debug, hasEntry("delegate", "RangeAggregator.FromFilters")); - Map delegateDebug = (Map) debug.get("delegate_debug"); - assertThat(delegateDebug, hasEntry("delegate", "FiltersAggregator.FilterByFilter")); - assertThat(delegateDebug, hasEntry("ranges", 1)); - delegateDebug = (Map) delegateDebug.get("delegate_debug"); - assertThat(delegateDebug, hasEntry("estimated_cost", 0L)); - }, ft, fnft); - testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalDateHistogram result) -> { - assertThat(result.getBuckets(), hasSize(1)); - assertThat(result.getBuckets().get(0).getKeyAsString(), equalTo("2020-01-01T00:00:00.000Z")); - assertThat(result.getBuckets().get(0).getDocCount(), equalTo(5000L)); - }, ft, fnft); + debugTestCase( + builder, + new MatchAllDocsQuery(), + buildIndex, + (InternalDateHistogram result, Class impl, Map> debug) -> { + assertThat(result.getBuckets(), hasSize(1)); + assertThat(result.getBuckets().get(0).getKeyAsString(), equalTo("2020-01-01T00:00:00.000Z")); + assertThat(result.getBuckets().get(0).getDocCount(), equalTo(5000L)); + + assertThat(impl, equalTo(DateHistogramAggregator.FromDateRange.class)); + assertMap(debug, matchesMap() + .entry("d", matchesMap() + .entry("delegate", "RangeAggregator.FromFilters") + .entry("delegate_debug", matchesMap() + .entry("ranges", 1) + .entry("average_docs_per_range", 5010.0) + .entry("delegate", "FilterByFilterAggregator") + .entry("delegate_debug", matchesMap() + .entry("segments_with_doc_count_field", 0) + .entry("segments_with_deleted_docs", 0) + .entry("segments_counted", greaterThan(0)) + .entry("segments_collected", 0) + .entry("filters", matchesList().item(matchesMap() + .entry("query", "DocValuesFieldExistsQuery [field=f]") + .entry("specialized_for", "docvalues_field_exists") + .entry("results_from_metadata", greaterThan(0))) + ) + ) + ) + ) + ); + }, + ft, + fnft + ); } private void aggregationImplementationChoiceTestCase( diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index e80ac00bcdf6e..98e290c9f5c11 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -25,9 +25,11 @@ import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LongScriptFieldType; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; +import org.elasticsearch.script.LongFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.StringFieldScript; import org.elasticsearch.search.aggregations.Aggregator; @@ -46,10 +48,12 @@ import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import static io.github.nik9000.mapmatcher.MapMatcher.assertMap; +import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap; import static java.util.Collections.singleton; import static java.util.stream.Collectors.toList; +import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasSize; public class RangeAggregatorTests extends AggregatorTestCase { @@ -477,14 +481,13 @@ public void testOverlappingRanges() throws IOException { } /** - * If the top level query is a runtime field we should still use - * {@link RangeAggregator.FromFilters} because we expect it'll still be faster - * that the normal aggregator, even though running the script for the runtime - * field is quite a bit more expensive than a regular query. The thing is, we - * won't be executing the script more times than we would if it were just at - * the top level. + * If the top level query is a runtime field we use the standard aggregator + * because it's marginally faster. You'd expect it to be a *ton* faster but + * usually the ranges drive the iteration and they are still fairly fast. + * But the union operation overhead that comes with combining the range with + * the top level query tends to slow us down more than the standard aggregator. */ - public void testRuntimeFieldTopLevelQueryStillOptimized() throws IOException { + public void testRuntimeFieldTopLevelQueryNotOptimized() throws IOException { long totalDocs = (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 4; SearchLookup lookup = new SearchLookup(s -> null, (ft, l) -> null); StringFieldScript.LeafFactory scriptFactory = ctx -> new StringFieldScript( @@ -517,14 +520,70 @@ public void execute() { r.getBuckets().stream().map(InternalRange.Bucket::getDocCount).collect(toList()), equalTo(org.elasticsearch.core.List.of(totalDocs, 0L, 0L)) ); - assertThat(impl, equalTo(RangeAggregator.FromFilters.class)); - Map topLevelDebug = (Map) debug.get("r"); - Map delegateDebug = (Map) topLevelDebug.get("delegate_debug"); - assertThat(delegateDebug, hasEntry("estimated_cost", totalDocs)); - assertThat(delegateDebug, hasEntry("max_cost", totalDocs)); + assertThat(impl, equalTo(RangeAggregator.NoOverlap.class)); + assertMap(debug, matchesMap().entry("r", matchesMap().entry("ranges", 3).entry("average_docs_per_range", closeTo(6667, 1)))); }, new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER)); } + /** + * If the field we're getting the range of is a runtime field it'd be super + * slow to run a bunch of range queries on it so we disable the optimization. + */ + public void testRuntimeFieldRangesNotOptimized() throws IOException { + long totalDocs = (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 4; + LongFieldScript.Factory scriptFactory = (fieldName, params, l) -> ctx -> new LongFieldScript( + fieldName, + org.elasticsearch.core.Map.of(), + l, + ctx + ) { + @Override + public void execute() { + emit((long) getDoc().get(NUMBER_FIELD_NAME).get(0)); + } + }; + MappedFieldType dummyFt = new LongScriptFieldType( + "dummy", + scriptFactory, + new Script("test"), + org.elasticsearch.core.Map.of(), + null + ); + MappedFieldType numberFt = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER); + debugTestCase( + new RangeAggregationBuilder("r").field("dummy").addRange(0, 1).addRange(1, 2).addRange(2, 3), + new MatchAllDocsQuery(), + iw -> { + for (int d = 0; d < totalDocs; d++) { + iw.addDocument( + org.elasticsearch.core.List.of( + new IntPoint(NUMBER_FIELD_NAME, 0), + new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 0) + ) + ); + } + }, + (InternalRange r, Class impl, Map> debug) -> { + assertThat( + r.getBuckets().stream().map(InternalRange.Bucket::getKey).collect(toList()), + equalTo(org.elasticsearch.core.List.of("0.0-1.0", "1.0-2.0", "2.0-3.0")) + ); + assertThat( + r.getBuckets().stream().map(InternalRange.Bucket::getDocCount).collect(toList()), + equalTo(org.elasticsearch.core.List.of(totalDocs, 0L, 0L)) + ); + + assertThat(impl, equalTo(RangeAggregator.NoOverlap.class)); + assertMap( + debug, + matchesMap().entry("r", matchesMap().entry("ranges", 3).entry("average_docs_per_range", closeTo(6667, 1))) + ); + }, + dummyFt, + numberFt + ); + } + private void testCase(Query query, CheckedConsumer buildIndex, Consumer> verify) throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 69ac082cd5440..fc2593355c9fb 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -53,6 +53,7 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordField; import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; +import org.elasticsearch.index.mapper.KeywordScriptFieldType; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; @@ -139,14 +140,13 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasEntry; -import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.not; public class TermsAggregatorTests extends AggregatorTestCase { @@ -1942,28 +1942,53 @@ public void testWithFilterAndPreciseSize() throws IOException { .add(new TermQuery(new Term("k", "b")), Occur.SHOULD) .add(new TermQuery(new Term("k", "c")), Occur.SHOULD) .build(); - testCase(builder, topLevel, buildIndex, (StringTerms terms) -> { - assertThat( - terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), - equalTo(org.elasticsearch.core.List.of("b", "c")) - ); - }, kft); - withAggregator(builder, topLevel, buildIndex, (searcher, terms) -> { - Map info = new HashMap<>(); - terms.collectDebugInfo(info::put); - assertThat(info, hasEntry("delegate", "FiltersAggregator.FilterByFilter")); - }, kft); + debugTestCase(builder, topLevel, buildIndex, + (StringTerms terms, Class impl, Map> debug) -> { + assertThat( + terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), + equalTo(org.elasticsearch.core.List.of("b", "c")) + ); + assertThat( + terms.getBuckets().stream().map(StringTerms.Bucket::getDocCount).collect(toList()), + equalTo(org.elasticsearch.core.List.of(1L, 1L)) + ); + /* + * The bug used to happen with the filter by filter aggregator + * but at this point we don't use it here. We really *could* + * because the top level query is a pure disjunction of + * two terms that we're querying, but we don't have to code + * to figure that out at this point. + */ + + assertTrue( + "impl " + impl, + impl.equals(GlobalOrdinalsStringTermsAggregator.class) + || impl.equals(GlobalOrdinalsStringTermsAggregator.LowCardinality.class) + ); + assertMap( + debug, + matchesMap().entry( + "k", + matchesMap().extraOk() + .entry("result_strategy", "terms") + .entry("has_filter", false) + .entry("segments_with_multi_valued_ords", 0) + .entry("segments_with_single_valued_ords", greaterThan(0)) + .entry("collection_strategy", either(equalTo("remap using single bucket ords")).or(equalTo("dense"))) + ) + ); + }, + kft); } /** - * If the top level query is a runtime field we should still use - * {@link StringTermsAggregatorFromFilters} because we expect it'll still - * be faster that the normal aggregator, even though running the script - * for the runtime field is quite a bit more expensive than a regular - * query. The thing is, we won't be executing the script more times than - * we would if it were just at the top level. + * If the top level query is a runtime field we use the standard aggregator + * because it's marginally faster. You'd expect it to be a *ton* faster but + * usually the terms drive the iteration and they are still fairly fast. + * But the union operation overhead that comes with combining the range with + * the top level query tends to slow us down more than the standard aggregator. */ - public void testRuntimeFieldTopLevelQueryStillOptimized() throws IOException { + public void testRuntimeFieldTopLevelNotOptimized() throws IOException { long totalDocs = 500; SearchLookup lookup = new SearchLookup(s -> null, (ft, l) -> null); StringFieldScript.LeafFactory scriptFactory = ctx -> new StringFieldScript( @@ -2000,16 +2025,93 @@ public void execute() { r.getBuckets().stream().map(StringTerms.Bucket::getDocCount).collect(toList()), equalTo(org.elasticsearch.core.List.of(167L, 167L, 166L)) ); - assertThat(impl, equalTo(StringTermsAggregatorFromFilters.class)); - Map topLevelDebug = (Map) debug.get("t"); - Map delegateDebug = (Map) topLevelDebug.get("delegate_debug"); - // We don't estimate the cost here so these shouldn't show up - assertThat(delegateDebug, not(hasKey("estimated_cost"))); - assertThat(delegateDebug, not(hasKey("max_cost"))); - assertThat((int) delegateDebug.get("segments_counted"), greaterThan(0)); + assertThat( + r.getBuckets().stream().map(StringTerms.Bucket::getDocCount).collect(toList()), + equalTo(org.elasticsearch.core.List.of(167L, 167L, 166L)) + ); + + assertTrue( + "impl " + impl, + impl.equals(GlobalOrdinalsStringTermsAggregator.class) + || impl.equals(GlobalOrdinalsStringTermsAggregator.LowCardinality.class) + ); + assertMap( + debug, + matchesMap().entry( + "t", + matchesMap().extraOk() + .entry("result_strategy", "terms") + .entry("has_filter", false) + .entry("segments_with_multi_valued_ords", 0) + .entry("segments_with_single_valued_ords", greaterThan(0)) + .entry("collection_strategy", either(equalTo("remap using single bucket ords")).or(equalTo("dense"))) + ) + ); }, new KeywordFieldType("k", true, true, Collections.emptyMap())); } + /** + * If the field we're collecting the terms for is a runtime field we can't + * perform any of the optimization because we can't make a list to query + * for. Even if we could running the queries would be slow. + */ + public void testRuntimeFieldTermsNotOptimized() throws IOException { + long totalDocs = 500; + StringFieldScript.Factory scriptFactory = (fieldName, params, lookup) -> ctx -> new StringFieldScript( + fieldName, + org.elasticsearch.core.Map.of(), + lookup, + ctx + ) { + @Override + public void execute() { + emit((String) getDoc().get("k").get(0)); + } + }; + BytesRef[] values = new BytesRef[] { + new BytesRef("stuff"), new BytesRef("more_stuff"), new BytesRef("other_stuff"), + }; + MappedFieldType keywordFt = new KeywordFieldType("k", true, true, Collections.emptyMap()); + MappedFieldType dummyFt = new KeywordScriptFieldType( + "dummy", + scriptFactory, + new Script("test"), + org.elasticsearch.core.Map.of(), + null + ); + debugTestCase(new TermsAggregationBuilder("t").field("dummy"), new MatchAllDocsQuery(), iw -> { + for (int d = 0; d < totalDocs; d++) { + BytesRef value = values[d % values.length]; + iw.addDocument( + org.elasticsearch.core.List.of( + new Field("k", value, KeywordFieldMapper.Defaults.FIELD_TYPE), + new SortedSetDocValuesField("k", value) + ) + ); + } + }, (StringTerms r, Class impl, Map> debug) -> { + assertThat( + r.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), + equalTo(org.elasticsearch.core.List.of("more_stuff", "stuff", "other_stuff")) + ); + assertThat( + r.getBuckets().stream().map(StringTerms.Bucket::getDocCount).collect(toList()), + equalTo(org.elasticsearch.core.List.of(167L, 167L, 166L)) + ); + + assertEquals(impl, MapStringTermsAggregator.class); + assertMap( + debug, + matchesMap().entry( + "t", + matchesMap().extraOk() + .entry("result_strategy", "terms") + .entry("collection_strategy", "from Field [dummy] of type [keyword]") + ) + ); + }, keywordFt, dummyFt); + } + private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID(); private List generateDocsWithNested(String id, int value, int[] nestedValues) { List documents = new ArrayList<>();