Speed up terms agg when alone (#69377)

This speeds up the `terms` agg in a very specific case: 1. It has no child aggregations 2. It has no parent aggregations 3. There are no deleted documents 4. You are not using document level security 5. There is no top level query 6. The field has global ordinals 7. There are less than one thousand distinct terms That is a lot of restirctions! But the speed up pretty substantial because in those cases we can serve the entire aggregation using metadata that lucene precomputes while it builds the index. In a real rally track we have we get a 92% speed improvement, but the index isn't *that* big: ``` | 90th percentile service time | keyword-terms-low-cardinality | 446.031 | 36.7677 | -409.263 | ms | ``` In a rally track with a larger index I ran some tests by hand and the aggregation went from 2200ms to 8ms. Even though there are 7 restrictions on this, I expect it to come into play enough to matter. Restriction 6 just means you are aggregating on a `keyword` field. Or an `ip`. And its fairly common for `keyword`s to have less than a thousand distinct values. Certainly not everywhere, but some places. I expect "cold tier" indices are very very likely not to have deleted documents at all. And the optimization works segment by segment - so it'll save some time on each segment without deleted documents. But more time if the entire index doesn't have any. The optimization builds on #68871 which translates `terms` aggregations against low cardinality fields with global ordinals into a `filters` aggregation. This teaches the `filters` aggregation to recognize when it can get its results from the index metadata. Rather, it creates the infrastructure to make that fairly simple and applies it in the case of the queries generated by the terms aggregation.
elastic · Feb 25, 2021 · 4ffdad3 · 4ffdad3
1 parent 2fd6337
commit 4ffdad3
Show file tree

Hide file tree

Showing 11 changed files with 759 additions and 223 deletions.
diff --git a/...api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/...api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml
@@ -178,4 +178,4 @@ setup:
   - match: { aggregations.f.buckets.foo.doc_count: 8 }
   - match: { aggregations.f.buckets.xyz.doc_count: 5 }
   - match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
-  - gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 }
+  - gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count_field: 1 }
diff --git a/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java
@@ -41,6 +41,8 @@
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasEntry;
+import static org.hamcrest.Matchers.hasKey;
+import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.SuiteScopeTestCase
@@ -633,10 +635,16 @@ public void testFilterByFilter() throws InterruptedException, IOException {
             assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
             Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
             assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
-            assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0));
+            assertThat(delegateDebug, hasEntry("segments_with_doc_count_field", 0));
             assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
             assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
             assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L));  // ~1,276,734 nanos is normal
+            List<?> filtersDebug = (List<?>) delegateDebug.get("filters");
+            assertThat(filtersDebug, hasSize(1));
+            Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
+            assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
+            assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
+            assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
         }
     }
 }
diff --git a/.../src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/.../src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java
diff --git a/...in/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/...in/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java
@@ -8,7 +8,6 @@
 
 package org.elasticsearch.search.aggregations.bucket.filter;
 
-import org.apache.lucene.search.Query;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.AggregatorFactory;
@@ -17,13 +16,13 @@
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
 public class FiltersAggregatorFactory extends AggregatorFactory {
 
-    private final String[] keys;
-    private final Query[] filters;
+    private final List<QueryToFilterAdapter<?>> filters;
     private final boolean keyed;
     private final boolean otherBucket;
     private final String otherBucketKey;
@@ -35,20 +34,17 @@ public FiltersAggregatorFactory(String name, List<KeyedFilter> filters, boolean
         this.keyed = keyed;
         this.otherBucket = otherBucket;
         this.otherBucketKey = otherBucketKey;
-        keys = new String[filters.size()];
-        this.filters = new Query[filters.size()];
-        for (int i = 0; i < filters.size(); ++i) {
-            KeyedFilter keyedFilter = filters.get(i);
-            this.keys[i] = keyedFilter.key();
-            this.filters[i] = context.buildQuery(keyedFilter.filter());
+        this.filters = new ArrayList<>(filters.size());
+        for (KeyedFilter f : filters) {
+            this.filters.add(QueryToFilterAdapter.build(context.searcher(), f.key(), context.buildQuery(f.filter())));
         }
     }
 
     @Override
     public Aggregator createInternal(Aggregator parent,
                                         CardinalityUpperBound cardinality,
                                         Map<String, Object> metadata) throws IOException {
-        return FiltersAggregator.build(name, factories, keys, filters, keyed,
+        return FiltersAggregator.build(name, factories, filters, keyed,
             otherBucket ? otherBucketKey : null, context, parent, cardinality, metadata);
     }
 }