From eea65da4b646537de9959d92805da64cfd75fae7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <cbuescher@posteo.de>
Date: Mon, 4 Mar 2019 17:36:49 +0100
Subject: [PATCH] Shortcut counts on exists queries (#39570)

`TopDocsCollectorContext` can already shortcut hit counts on `match_all` and `term` queries when there are no deletions.
This change adds this ability for `exists` queries if the index doesn't have deletions and fields are indexed.

Closes #37475
---
 .../search/query/TopDocsCollectorContext.java | 30 +++++++++++++++
 .../search/query/QueryPhaseTests.java         | 38 +++++++++++++++----
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java
index 1ccc8f4cb92db..1e2cd7541f944 100644
--- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java
+++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java
@@ -19,12 +19,18 @@
 
 package org.elasticsearch.search.query;
 
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MultiCollector;
@@ -125,6 +131,7 @@ private EmptyTopDocsCollectorContext(IndexReader reader, Query query,
             }
         }
 
+        @Override
         Collector create(Collector in) {
             assert in == null;
             return collector;
@@ -357,6 +364,29 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep
                 count += context.reader().docFreq(term);
             }
             return count;
+        } else if (query.getClass() == DocValuesFieldExistsQuery.class && reader.hasDeletions() == false) {
+            final String field = ((DocValuesFieldExistsQuery) query).getField();
+            int count = 0;
+            for (LeafReaderContext context : reader.leaves()) {
+                FieldInfos fieldInfos = context.reader().getFieldInfos();
+                FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+                if (fieldInfo != null) {
+                    if (fieldInfo.getPointIndexDimensionCount() > 0) {
+                        PointValues points = context.reader().getPointValues(field);
+                        if (points != null) {
+                            count += points.getDocCount();
+                        }
+                    } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
+                        Terms terms = context.reader().terms(field);
+                        if (terms != null) {
+                            count += terms.getDocCount();
+                        }
+                    } else {
+                        return -1; // no shortcut possible for fields that are not indexed
+                    }
+                }
+            }
+            return count;
         } else {
             return -1;
         }
diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
index a321ff9c1a80a..16b18efe62322 100644
--- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
+++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
@@ -21,7 +21,10 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LatLonDocValuesField;
+import org.apache.lucene.document.LatLonPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -35,6 +38,7 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.FilterCollector;
@@ -50,6 +54,7 @@
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.action.search.SearchTask;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.query.ParsedQuery;
@@ -92,18 +97,20 @@ public void tearDown() throws Exception {
         closeShards(indexShard);
     }
 
-    private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
+    private void countTestCase(Query query, IndexReader reader, boolean shouldCollectSearch, boolean shouldCollectCount) throws Exception {
         TestSearchContext context = new TestSearchContext(null, indexShard);
         context.parsedQuery(new ParsedQuery(query));
         context.setSize(0);
         context.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap()));
 
-        final IndexSearcher searcher = shouldCollect ? new IndexSearcher(reader) :
+        final IndexSearcher searcher = shouldCollectSearch ? new IndexSearcher(reader) :
             getAssertingEarlyTerminationSearcher(reader, 0);
 
         final boolean rescore = QueryPhase.execute(context, searcher, checkCancelled -> {});
         assertFalse(rescore);
-        assertEquals(searcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
+        IndexSearcher countSearcher = shouldCollectCount ? new IndexSearcher(reader) :
+            getAssertingEarlyTerminationSearcher(reader, 0);
+        assertEquals(countSearcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
     }
 
     private void countTestCase(boolean withDeletions) throws Exception {
@@ -115,9 +122,14 @@ private void countTestCase(boolean withDeletions) throws Exception {
             Document doc = new Document();
             if (randomBoolean()) {
                 doc.add(new StringField("foo", "bar", Store.NO));
+                doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar")));
+                doc.add(new SortedSetDocValuesField("docValuesOnlyField", new BytesRef("bar")));
+                doc.add(new LatLonDocValuesField("latLonDVField", 1.0, 1.0));
+                doc.add(new LatLonPoint("latLonDVField", 1.0, 1.0));
             }
             if (randomBoolean()) {
                 doc.add(new StringField("foo", "baz", Store.NO));
+                doc.add(new SortedSetDocValuesField("foo", new BytesRef("baz")));
             }
             if (withDeletions && (rarely() || i == 0)) {
                 doc.add(new StringField("delete", "yes", Store.NO));
@@ -132,16 +144,25 @@ private void countTestCase(boolean withDeletions) throws Exception {
         Query matchAllCsq = new ConstantScoreQuery(matchAll);
         Query tq = new TermQuery(new Term("foo", "bar"));
         Query tCsq = new ConstantScoreQuery(tq);
+        Query dvfeq = new DocValuesFieldExistsQuery("foo");
+        Query dvfeq_points = new DocValuesFieldExistsQuery("latLonDVField");
+        Query dvfeqCsq = new ConstantScoreQuery(dvfeq);
+        // field with doc-values but not indexed will need to collect
+        Query dvOnlyfeq = new DocValuesFieldExistsQuery("docValuesOnlyField");
         BooleanQuery bq = new BooleanQuery.Builder()
             .add(matchAll, Occur.SHOULD)
             .add(tq, Occur.MUST)
             .build();
 
-        countTestCase(matchAll, reader, false);
-        countTestCase(matchAllCsq, reader, false);
-        countTestCase(tq, reader, withDeletions);
-        countTestCase(tCsq, reader, withDeletions);
-        countTestCase(bq, reader, true);
+        countTestCase(matchAll, reader, false, false);
+        countTestCase(matchAllCsq, reader, false, false);
+        countTestCase(tq, reader, withDeletions, withDeletions);
+        countTestCase(tCsq, reader, withDeletions, withDeletions);
+        countTestCase(dvfeq, reader, withDeletions, true);
+        countTestCase(dvfeq_points, reader, withDeletions, true);
+        countTestCase(dvfeqCsq, reader, withDeletions, true);
+        countTestCase(dvOnlyfeq, reader, true, true);
+        countTestCase(bq, reader, true, true);
         reader.close();
         w.close();
         dir.close();
@@ -541,6 +562,7 @@ public void testIndexSortScrollOptimization() throws Exception {
 
     private static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) {
         return new IndexSearcher(reader) {
+            @Override
             protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
                 final Collector in = new AssertingEarlyTerminationFilterCollector(collector, size);
                 super.search(leaves, weight, in);