Optimize lone single bucket date_histogram (#71180)

This optimizes the `date_histogram` agg when there is a single bucket and no sub-aggregations. We expect this to happen from time to time when the buckets are larger than a day because folks often use "daily" indices. This was already fairly fast, but using the metadata makes it 10x faster. Something like 98ms becomes 7.5ms. Nice if you can get it! Like #69377 this optimization will disable itself if you have document level security enabled or are querying a rollup index. Also like #69377 it won't do anything if there is a top level query.
elastic · May 12, 2021 · 0cf63fc · 0cf63fc
1 parent dc1bf6e
commit 0cf63fc
Show file tree

Hide file tree

Showing 5 changed files with 308 additions and 16 deletions.
diff --git a/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java
@@ -17,6 +17,7 @@
 import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
 import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
 import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
+import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
 import org.elasticsearch.search.profile.ProfileResult;
 import org.elasticsearch.search.profile.ProfileShardResult;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -590,7 +591,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {
 
         SearchResponse response = client().prepareSearch("dateidx")
             .setProfile(true)
-            .addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
+            .addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
+                // Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
+                .subAggregation(new MaxAggregationBuilder("m").field("date")))
             .get();
         assertSearchResponse(response);
         Map<String, ProfileShardResult> profileResults = response.getProfileResults();
@@ -607,7 +610,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
             assertThat(histoAggResult, notNullValue());
             assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
             assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
-            assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
+            assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
             assertThat(histoAggResult.getTime(), greaterThan(0L));
             Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
             assertThat(breakdown, notNullValue());
@@ -639,7 +642,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
             Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
             assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
             assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
-            assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
+            assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
         }
     }
 }
diff --git a/...c/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/...c/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java
@@ -8,12 +8,15 @@
 
 package org.elasticsearch.search.aggregations.bucket.filter;
 
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
@@ -49,9 +52,21 @@ public class QueryToFilterAdapter<Q extends Query> {
      */
     public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
         query = searcher.rewrite(query);
+        if (query instanceof ConstantScoreQuery) {
+            /*
+             * Unwrap constant score because it gets in the way of us
+             * understanding what the queries are trying to do and we
+             * don't use the score at all anyway. Effectively we always
+             * run in constant score mode.
+             */
+            query = ((ConstantScoreQuery) query).getQuery();
+        }
         if (query instanceof TermQuery) {
             return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
         }
+        if (query instanceof DocValuesFieldExistsQuery) {
+            return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
+        }
         if (query instanceof MatchAllDocsQuery) {
             return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
         }
@@ -386,4 +401,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
             add.accept("results_from_metadata", resultsFromMetadata);
         }
     }
+
+    private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
+        private int resultsFromMetadata;
+
+        private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
+            super(searcher, key, query);
+        }
+
+        @Override
+        long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
+            if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
+                resultsFromMetadata++;
+                PointValues points = ctx.reader().getPointValues(query().getField());
+                if (points == null) {
+                    return 0;
+                }
+                return points.getDocCount();
+
+            }
+            return super.count(ctx, counter, live);
+        }
+
+        @Override
+        long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
+            if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
+                return 0;
+            }
+            return super.estimateCountCost(ctx, canUseMetadata);
+        }
+
+        private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
+            FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
+            if (info == null) {
+                // If we don't have any info then there aren't any values anyway.
+                return true;
+            }
+            return info.getPointDimensionCount() > 0;
+        }
+
+        @Override
+        void collectDebugInfo(BiConsumer<String, Object> add) {
+            super.collectDebugInfo(add);
+            add.accept("specialized_for", "docvalues_field_exists");
+            add.accept("results_from_metadata", resultsFromMetadata);
+        }
+    }
 }
diff --git a/...test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/...test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java
@@ -10,7 +10,9 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
@@ -32,12 +34,14 @@
 import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
 import org.elasticsearch.index.mapper.DocCountFieldMapper;
+import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
 import org.elasticsearch.index.mapper.ObjectMapper;
+import org.elasticsearch.index.query.ExistsQueryBuilder;
 import org.elasticsearch.index.query.MatchAllQueryBuilder;
 import org.elasticsearch.index.query.MatchQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilder;
@@ -61,7 +65,6 @@
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
 import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
-import org.junit.Before;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -72,6 +75,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.function.IntFunction;
 
 import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.equalTo;
@@ -85,14 +89,6 @@
 import static org.mockito.Mockito.mock;
 
 public class FiltersAggregatorTests extends AggregatorTestCase {
-    private MappedFieldType fieldType;
-
-    @Before
-    public void setUpTest() throws Exception {
-        super.setUp();
-        fieldType = new KeywordFieldMapper.KeywordFieldType("field");
-    }
-
     public void testEmpty() throws Exception {
         Directory directory = newDirectory();
         RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
@@ -106,7 +102,12 @@ public void testEmpty() throws Exception {
         }
         FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
         builder.otherBucketKey("other");
-        InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
+        InternalFilters response = searchAndReduce(
+            indexSearcher,
+            new MatchAllDocsQuery(),
+            builder,
+            new KeywordFieldMapper.KeywordFieldType("field")
+        );
         assertEquals(response.getBuckets().size(), numFilters);
         for (InternalFilters.InternalBucket filter : response.getBuckets()) {
             assertEquals(filter.getDocCount(), 0);
@@ -206,7 +207,12 @@ public void testKeyedFilter() throws Exception {
         FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
         builder.otherBucket(true);
         builder.otherBucketKey("other");
-        final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
+        final InternalFilters filters = searchAndReduce(
+            indexSearcher,
+            new MatchAllDocsQuery(),
+            builder,
+            new KeywordFieldMapper.KeywordFieldType("field")
+        );
         assertEquals(filters.getBuckets().size(), 7);
         assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
         assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
@@ -261,7 +267,12 @@ public void testRandom() throws Exception {
             builder.otherBucket(true);
             builder.otherBucketKey("other");
 
-            final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
+            final InternalFilters response = searchAndReduce(
+                indexSearcher,
+                new MatchAllDocsQuery(),
+                builder,
+                new KeywordFieldMapper.KeywordFieldType("field")
+            );
             List<InternalFilters.InternalBucket> buckets = response.getBuckets();
             assertEquals(buckets.size(), filters.length + 1);
 
@@ -789,6 +800,157 @@ public void testSubAggsManyFilters() throws IOException {
         }, dateFt, intFt);
     }
 
+    public void testDocValuesFieldExistsForDate() throws IOException {
+        DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
+        QueryBuilder exists;
+        if (randomBoolean()) {
+            exists = new ExistsQueryBuilder("f");
+        } else {
+            // Range query covering all values in the index is rewritten to exists
+            exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
+        }
+        long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
+        docValuesFieldExistsTestCase(exists, ft, true, i -> {
+            long date = start + TimeUnit.HOURS.toMillis(i);
+            return List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
+        });
+    }
+
+    public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
+        DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
+        long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
+        docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
+            long date = start + TimeUnit.HOURS.toMillis(i);
+            return List.of(
+                new LongPoint("f", date),
+                new LongPoint("f", date + 10),
+                new SortedNumericDocValuesField("f", date),
+                new SortedNumericDocValuesField("f", date + 10)
+            );
+        });
+    }
+
+    public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
+        docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
+    }
+
+    public void testDocValuesFieldExistsForNumber() throws IOException {
+        NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
+        NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
+            "f",
+            numberType,
+            true,
+            false,
+            true,
+            true,
+            null,
+            Map.of(),
+            null
+        );
+        docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
+            return numberType.createFields("f", i, true, true, false);
+        });
+    }
+
+    public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
+        docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
+            "f",
+            randomFrom(NumberFieldMapper.NumberType.values()),
+            true,
+            false,
+            true,
+            true,
+            null,
+            Map.of(),
+            null
+        ));
+    }
+
+    public void testDocValuesFieldExistsForKeyword() throws IOException {
+        KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of());
+        docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
+            BytesRef text = new BytesRef(randomAlphaOfLength(5));
+            return List.of(new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("f", text));
+        });
+    }
+
+    public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
+        docValuesFieldExistsNoDataTestCase(new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of()));
+    }
+
+    private void docValuesFieldExistsTestCase(
+        QueryBuilder exists,
+        MappedFieldType fieldType,
+        boolean canUseMetadata,
+        IntFunction<List<? extends IndexableField>> buildDocWithField
+    ) throws IOException {
+        AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            for (int i = 0; i < 10; i++) {
+                iw.addDocument(buildDocWithField.apply(i));
+            }
+            for (int i = 0; i < 10; i++) {
+                iw.addDocument(List.of());
+            }
+        };
+        // Exists queries convert to MatchNone if this isn't defined
+        FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
+        debugTestCase(
+            builder,
+            new MatchAllDocsQuery(),
+            buildIndex,
+            (InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
+                assertThat(result.getBuckets(), hasSize(1));
+                assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));
+
+                assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
+                Map<?, ?> filterAggDebug = debug.get("test");
+                List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
+                Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
+                assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
+                assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
+            },
+            fieldType,
+            fnft
+        );
+        withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
+            long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
+            Map<String, Object> debug = new HashMap<>();
+            aggregator.collectDebugInfo(debug::put);
+            List<?> filtersDebug = (List<?>) debug.get("filters");
+            Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
+            assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
+            assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
+        }, fieldType, fnft);
+    }
+
+    private void docValuesFieldExistsNoDataTestCase(
+        MappedFieldType fieldType
+    ) throws IOException {
+        QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
+        AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            for (int i = 0; i < 10; i++) {
+                iw.addDocument(List.of());
+            }
+        };
+        // Exists queries convert to MatchNone if this isn't defined
+        FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
+        withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
+            assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
+            long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
+            Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
+            assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
+            assertThat(estimatedCost, equalTo(0L));
+            assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
+            assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
+        }, fieldType, fnft);
+        testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
+            assertThat(result.getBuckets(), hasSize(1));
+            assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
+        }, fieldType, fnft);
+    }
+
     @Override
     protected List<ObjectMapper> objectMappers() {
         return MOCK_OBJECT_MAPPERS;