Skip to content

Commit

Permalink
Optimize lone single bucket date_histogram
Browse files Browse the repository at this point in the history
This optimizes the `date_histogram` agg when there is a single bucket
and no sub-aggregations. We expect this to happen from time to time when
the buckets are larger than a day because folks often use "daily"
indices.

This was already fairly fast, but using the metadata makes it 10x
faster. Something like 98ms becomes 7.5ms. Nice if you can get it!

Like elastic#69377 this optimization will disable itself if you have document
level security enabled or are querying a rollup index. Also like elastic#69377
it won't do anything if there is a top level query.
  • Loading branch information
nik9000 committed Mar 26, 2021
1 parent 6056afa commit 29d21b1
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

package org.elasticsearch.search.aggregations.bucket.filter;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
Expand Down Expand Up @@ -52,6 +55,15 @@ public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key,
if (query instanceof TermQuery) {
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
}
if (query instanceof ConstantScoreQuery) {
Query wrapped = ((ConstantScoreQuery) query).getQuery();
if (wrapped instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) wrapped);
}
}
if (query instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
}
if (query instanceof MatchAllDocsQuery) {
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
}
Expand Down Expand Up @@ -386,4 +398,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
add.accept("results_from_metadata", resultsFromMetadata);
}
}

private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
private int resultsFromMetadata;

private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
super(searcher, key, query);
}

@Override
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
resultsFromMetadata++;
PointValues points = ctx.reader().getPointValues(query().getField());
if (points == null) {
return 0;
}
return points.getDocCount();

}
return super.count(ctx, counter, live);
}

@Override
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
return 0;
}
return super.estimateCountCost(ctx, canUseMetadata);
}

private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
if (info == null) {
// If we don't have any info then there aren't any values anyway.
return true;
}
return info.getPointDimensionCount() > 0;
}

@Override
void collectDebugInfo(BiConsumer<String, Object> add) {
super.collectDebugInfo(add);
add.accept("specialized_for", "docvalues_field_exists");
add.accept("results_from_metadata", resultsFromMetadata);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
Expand All @@ -32,12 +34,14 @@
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
import org.elasticsearch.index.mapper.DocCountFieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -61,7 +65,6 @@
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
import org.junit.Before;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -72,6 +75,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.IntFunction;

import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -84,14 +88,6 @@
import static org.mockito.Mockito.mock;

public class FiltersAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;

@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
}

public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Expand All @@ -105,7 +101,12 @@ public void testEmpty() throws Exception {
}
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
builder.otherBucketKey("other");
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(response.getBuckets().size(), numFilters);
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
assertEquals(filter.getDocCount(), 0);
Expand Down Expand Up @@ -176,7 +177,12 @@ public void testKeyedFilter() throws Exception {
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
builder.otherBucket(true);
builder.otherBucketKey("other");
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters filters = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(filters.getBuckets().size(), 7);
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
Expand Down Expand Up @@ -231,7 +237,12 @@ public void testRandom() throws Exception {
builder.otherBucket(true);
builder.otherBucketKey("other");

final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
assertEquals(buckets.size(), filters.length + 1);

Expand Down Expand Up @@ -755,6 +766,133 @@ public void testSubAggsManyFilters() throws IOException {
}, dateFt, intFt);
}

public void testDocValuesFieldExistsForDate() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
QueryBuilder exists;
if (randomBoolean()) {
exists = new ExistsQueryBuilder("f");
} else {
// Range query covering all values in the index is rewritten to exists
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
}
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(exists, ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
});
}

public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
}

public void testDocValuesFieldExistsForNumber() throws IOException {
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
"f",
numberType,
true,
false,
true,
true,
null,
Map.of()
);
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
return numberType.createFields("f", i, true, true, false);
});
}

public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
"f",
randomFrom(NumberFieldMapper.NumberType.values()),
true,
false,
true,
true,
null,
Map.of()
));
}

public void testDocValuesFieldExistsForKeyword() throws IOException {
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of());
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
BytesRef text = new BytesRef(randomAlphaOfLength(5));
return List.of(new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("f", text));
});
}

public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of()));
}

private void docValuesFieldExistsTestCase(
QueryBuilder exists,
MappedFieldType fieldType,
boolean canUseMetadata,
IntFunction<List<? extends IndexableField>> buildDocWithField
) throws IOException {
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(buildDocWithField.apply(i));
}
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
if (canUseMetadata) {
assertThat(estimatedCost, equalTo(0L));
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
} else {
assertThat(estimatedCost, greaterThan(0L));
assertThat((int) debug.get("results_from_metadata"), equalTo(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
}
}, fieldType, fnft);
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));
}, fieldType, fnft);
}

private void docValuesFieldExistsNoDataTestCase(
MappedFieldType fieldType
) throws IOException {
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat(estimatedCost, equalTo(0L));
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
}, fieldType, fnft);
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
}, fieldType, fnft);
}

@Override
protected List<ObjectMapper> objectMappers() {
return MOCK_OBJECT_MAPPERS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,21 @@ teardown:

- match: { hits.total: 6 } # can-read, read_write, everything
- match: { aggregations.f.buckets.0.doc_count: 6 }

---
"filters agg exists doesn't count invisible docs":
- do:
headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user
search:
rest_total_hits_as_int: true
size: 0
body:
aggs:
f:
filters:
filters:
- exists:
field: name

- match: { hits.total: 6 } # can-read, read_write, everything
- match: { aggregations.f.buckets.0.doc_count: 6 }

0 comments on commit 29d21b1

Please sign in to comment.