Skip to content

Commit

Permalink
Shortcut counts on exists queries (#39570)
Browse files Browse the repository at this point in the history
`TopDocsCollectorContext` can already shortcut hit counts on `match_all` and `term` queries when there are no deletions. 
This change adds this ability for `exists` queries if the index doesn't have deletions and fields are indexed.

Closes #37475
  • Loading branch information
Christoph Büscher authored Mar 4, 2019
1 parent 7aa5851 commit eea65da
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@

package org.elasticsearch.search.query;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
Expand Down Expand Up @@ -125,6 +131,7 @@ private EmptyTopDocsCollectorContext(IndexReader reader, Query query,
}
}

@Override
Collector create(Collector in) {
assert in == null;
return collector;
Expand Down Expand Up @@ -357,6 +364,29 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep
count += context.reader().docFreq(term);
}
return count;
} else if (query.getClass() == DocValuesFieldExistsQuery.class && reader.hasDeletions() == false) {
final String field = ((DocValuesFieldExistsQuery) query).getField();
int count = 0;
for (LeafReaderContext context : reader.leaves()) {
FieldInfos fieldInfos = context.reader().getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo != null) {
if (fieldInfo.getPointIndexDimensionCount() > 0) {
PointValues points = context.reader().getPointValues(field);
if (points != null) {
count += points.getDocCount();
}
} else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
Terms terms = context.reader().terms(field);
if (terms != null) {
count += terms.getDocCount();
}
} else {
return -1; // no shortcut possible for fields that are not indexed
}
}
}
return count;
} else {
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand All @@ -35,6 +38,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FilterCollector;
Expand All @@ -50,6 +54,7 @@
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.search.SearchTask;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.ParsedQuery;
Expand Down Expand Up @@ -92,18 +97,20 @@ public void tearDown() throws Exception {
closeShards(indexShard);
}

private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
private void countTestCase(Query query, IndexReader reader, boolean shouldCollectSearch, boolean shouldCollectCount) throws Exception {
TestSearchContext context = new TestSearchContext(null, indexShard);
context.parsedQuery(new ParsedQuery(query));
context.setSize(0);
context.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap()));

final IndexSearcher searcher = shouldCollect ? new IndexSearcher(reader) :
final IndexSearcher searcher = shouldCollectSearch ? new IndexSearcher(reader) :
getAssertingEarlyTerminationSearcher(reader, 0);

final boolean rescore = QueryPhase.execute(context, searcher, checkCancelled -> {});
assertFalse(rescore);
assertEquals(searcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
IndexSearcher countSearcher = shouldCollectCount ? new IndexSearcher(reader) :
getAssertingEarlyTerminationSearcher(reader, 0);
assertEquals(countSearcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
}

private void countTestCase(boolean withDeletions) throws Exception {
Expand All @@ -115,9 +122,14 @@ private void countTestCase(boolean withDeletions) throws Exception {
Document doc = new Document();
if (randomBoolean()) {
doc.add(new StringField("foo", "bar", Store.NO));
doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar")));
doc.add(new SortedSetDocValuesField("docValuesOnlyField", new BytesRef("bar")));
doc.add(new LatLonDocValuesField("latLonDVField", 1.0, 1.0));
doc.add(new LatLonPoint("latLonDVField", 1.0, 1.0));
}
if (randomBoolean()) {
doc.add(new StringField("foo", "baz", Store.NO));
doc.add(new SortedSetDocValuesField("foo", new BytesRef("baz")));
}
if (withDeletions && (rarely() || i == 0)) {
doc.add(new StringField("delete", "yes", Store.NO));
Expand All @@ -132,16 +144,25 @@ private void countTestCase(boolean withDeletions) throws Exception {
Query matchAllCsq = new ConstantScoreQuery(matchAll);
Query tq = new TermQuery(new Term("foo", "bar"));
Query tCsq = new ConstantScoreQuery(tq);
Query dvfeq = new DocValuesFieldExistsQuery("foo");
Query dvfeq_points = new DocValuesFieldExistsQuery("latLonDVField");
Query dvfeqCsq = new ConstantScoreQuery(dvfeq);
// field with doc-values but not indexed will need to collect
Query dvOnlyfeq = new DocValuesFieldExistsQuery("docValuesOnlyField");
BooleanQuery bq = new BooleanQuery.Builder()
.add(matchAll, Occur.SHOULD)
.add(tq, Occur.MUST)
.build();

countTestCase(matchAll, reader, false);
countTestCase(matchAllCsq, reader, false);
countTestCase(tq, reader, withDeletions);
countTestCase(tCsq, reader, withDeletions);
countTestCase(bq, reader, true);
countTestCase(matchAll, reader, false, false);
countTestCase(matchAllCsq, reader, false, false);
countTestCase(tq, reader, withDeletions, withDeletions);
countTestCase(tCsq, reader, withDeletions, withDeletions);
countTestCase(dvfeq, reader, withDeletions, true);
countTestCase(dvfeq_points, reader, withDeletions, true);
countTestCase(dvfeqCsq, reader, withDeletions, true);
countTestCase(dvOnlyfeq, reader, true, true);
countTestCase(bq, reader, true, true);
reader.close();
w.close();
dir.close();
Expand Down Expand Up @@ -541,6 +562,7 @@ public void testIndexSortScrollOptimization() throws Exception {

private static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) {
return new IndexSearcher(reader) {
@Override
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
final Collector in = new AssertingEarlyTerminationFilterCollector(collector, size);
super.search(leaves, weight, in);
Expand Down

0 comments on commit eea65da

Please sign in to comment.