From eea65da4b646537de9959d92805da64cfd75fae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 4 Mar 2019 17:36:49 +0100 Subject: [PATCH] Shortcut counts on exists queries (#39570) `TopDocsCollectorContext` can already shortcut hit counts on `match_all` and `term` queries when there are no deletions. This change adds this ability for `exists` queries if the index doesn't have deletions and fields are indexed. Closes #37475 --- .../search/query/TopDocsCollectorContext.java | 30 +++++++++++++++ .../search/query/QueryPhaseTests.java | 38 +++++++++++++++---- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 1ccc8f4cb92db..1e2cd7541f944 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -19,12 +19,18 @@ package org.elasticsearch.search.query; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; @@ -125,6 +131,7 @@ private EmptyTopDocsCollectorContext(IndexReader reader, Query query, } } + @Override Collector create(Collector in) { assert in == null; return collector; @@ -357,6 +364,29 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep count += context.reader().docFreq(term); } return count; + } else if (query.getClass() == DocValuesFieldExistsQuery.class && reader.hasDeletions() == false) { + final String field = ((DocValuesFieldExistsQuery) query).getField(); + int count = 0; + for (LeafReaderContext context : reader.leaves()) { + FieldInfos fieldInfos = context.reader().getFieldInfos(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + if (fieldInfo != null) { + if (fieldInfo.getPointIndexDimensionCount() > 0) { + PointValues points = context.reader().getPointValues(field); + if (points != null) { + count += points.getDocCount(); + } + } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { + Terms terms = context.reader().terms(field); + if (terms != null) { + count += terms.getDocCount(); + } + } else { + return -1; // no shortcut possible for fields that are not indexed + } + } + } + return count; } else { return -1; } diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index a321ff9c1a80a..16b18efe62322 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -21,7 +21,10 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.LatLonDocValuesField; +import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -35,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FilterCollector; @@ -50,6 +54,7 @@ import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.ParsedQuery; @@ -92,18 +97,20 @@ public void tearDown() throws Exception { closeShards(indexShard); } - private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception { + private void countTestCase(Query query, IndexReader reader, boolean shouldCollectSearch, boolean shouldCollectCount) throws Exception { TestSearchContext context = new TestSearchContext(null, indexShard); context.parsedQuery(new ParsedQuery(query)); context.setSize(0); context.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap())); - final IndexSearcher searcher = shouldCollect ? new IndexSearcher(reader) : + final IndexSearcher searcher = shouldCollectSearch ? new IndexSearcher(reader) : getAssertingEarlyTerminationSearcher(reader, 0); final boolean rescore = QueryPhase.execute(context, searcher, checkCancelled -> {}); assertFalse(rescore); - assertEquals(searcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value); + IndexSearcher countSearcher = shouldCollectCount ? new IndexSearcher(reader) : + getAssertingEarlyTerminationSearcher(reader, 0); + assertEquals(countSearcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value); } private void countTestCase(boolean withDeletions) throws Exception { @@ -115,9 +122,14 @@ private void countTestCase(boolean withDeletions) throws Exception { Document doc = new Document(); if (randomBoolean()) { doc.add(new StringField("foo", "bar", Store.NO)); + doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar"))); + doc.add(new SortedSetDocValuesField("docValuesOnlyField", new BytesRef("bar"))); + doc.add(new LatLonDocValuesField("latLonDVField", 1.0, 1.0)); + doc.add(new LatLonPoint("latLonDVField", 1.0, 1.0)); } if (randomBoolean()) { doc.add(new StringField("foo", "baz", Store.NO)); + doc.add(new SortedSetDocValuesField("foo", new BytesRef("baz"))); } if (withDeletions && (rarely() || i == 0)) { doc.add(new StringField("delete", "yes", Store.NO)); @@ -132,16 +144,25 @@ private void countTestCase(boolean withDeletions) throws Exception { Query matchAllCsq = new ConstantScoreQuery(matchAll); Query tq = new TermQuery(new Term("foo", "bar")); Query tCsq = new ConstantScoreQuery(tq); + Query dvfeq = new DocValuesFieldExistsQuery("foo"); + Query dvfeq_points = new DocValuesFieldExistsQuery("latLonDVField"); + Query dvfeqCsq = new ConstantScoreQuery(dvfeq); + // field with doc-values but not indexed will need to collect + Query dvOnlyfeq = new DocValuesFieldExistsQuery("docValuesOnlyField"); BooleanQuery bq = new BooleanQuery.Builder() .add(matchAll, Occur.SHOULD) .add(tq, Occur.MUST) .build(); - countTestCase(matchAll, reader, false); - countTestCase(matchAllCsq, reader, false); - countTestCase(tq, reader, withDeletions); - countTestCase(tCsq, reader, withDeletions); - countTestCase(bq, reader, true); + countTestCase(matchAll, reader, false, false); + countTestCase(matchAllCsq, reader, false, false); + countTestCase(tq, reader, withDeletions, withDeletions); + countTestCase(tCsq, reader, withDeletions, withDeletions); + countTestCase(dvfeq, reader, withDeletions, true); + countTestCase(dvfeq_points, reader, withDeletions, true); + countTestCase(dvfeqCsq, reader, withDeletions, true); + countTestCase(dvOnlyfeq, reader, true, true); + countTestCase(bq, reader, true, true); reader.close(); w.close(); dir.close(); @@ -541,6 +562,7 @@ public void testIndexSortScrollOptimization() throws Exception { private static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) { return new IndexSearcher(reader) { + @Override protected void search(List leaves, Weight weight, Collector collector) throws IOException { final Collector in = new AssertingEarlyTerminationFilterCollector(collector, size); super.search(leaves, weight, in);