Skip to content

Commit

Permalink
Optimize lone single bucket date_histogram (#71180)
Browse files Browse the repository at this point in the history
This optimizes the `date_histogram` agg when there is a single bucket
and no sub-aggregations. We expect this to happen from time to time when
the buckets are larger than a day because folks often use "daily"
indices.

This was already fairly fast, but using the metadata makes it 10x
faster. Something like 98ms becomes 7.5ms. Nice if you can get it!

Like #69377 this optimization will disable itself if you have document
level security enabled or are querying a rollup index. Also like #69377
it won't do anything if there is a top level query.
  • Loading branch information
nik9000 authored May 12, 2021
1 parent dc1bf6e commit 0cf63fc
Show file tree
Hide file tree
Showing 5 changed files with 308 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.search.profile.ProfileResult;
import org.elasticsearch.search.profile.ProfileShardResult;
import org.elasticsearch.test.ESIntegTestCase;
Expand Down Expand Up @@ -590,7 +591,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {

SearchResponse response = client().prepareSearch("dateidx")
.setProfile(true)
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
// Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
.subAggregation(new MaxAggregationBuilder("m").field("date")))
.get();
assertSearchResponse(response);
Map<String, ProfileShardResult> profileResults = response.getProfileResults();
Expand All @@ -607,7 +610,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
assertThat(histoAggResult, notNullValue());
assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
assertThat(histoAggResult.getTime(), greaterThan(0L));
Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
assertThat(breakdown, notNullValue());
Expand Down Expand Up @@ -639,7 +642,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

package org.elasticsearch.search.aggregations.bucket.filter;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
Expand Down Expand Up @@ -49,9 +52,21 @@ public class QueryToFilterAdapter<Q extends Query> {
*/
public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
query = searcher.rewrite(query);
if (query instanceof ConstantScoreQuery) {
/*
* Unwrap constant score because it gets in the way of us
* understanding what the queries are trying to do and we
* don't use the score at all anyway. Effectively we always
* run in constant score mode.
*/
query = ((ConstantScoreQuery) query).getQuery();
}
if (query instanceof TermQuery) {
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
}
if (query instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
}
if (query instanceof MatchAllDocsQuery) {
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
}
Expand Down Expand Up @@ -386,4 +401,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
add.accept("results_from_metadata", resultsFromMetadata);
}
}

private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
private int resultsFromMetadata;

private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
super(searcher, key, query);
}

@Override
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
resultsFromMetadata++;
PointValues points = ctx.reader().getPointValues(query().getField());
if (points == null) {
return 0;
}
return points.getDocCount();

}
return super.count(ctx, counter, live);
}

@Override
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
return 0;
}
return super.estimateCountCost(ctx, canUseMetadata);
}

private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
if (info == null) {
// If we don't have any info then there aren't any values anyway.
return true;
}
return info.getPointDimensionCount() > 0;
}

@Override
void collectDebugInfo(BiConsumer<String, Object> add) {
super.collectDebugInfo(add);
add.accept("specialized_for", "docvalues_field_exists");
add.accept("results_from_metadata", resultsFromMetadata);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
Expand All @@ -32,12 +34,14 @@
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
import org.elasticsearch.index.mapper.DocCountFieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -61,7 +65,6 @@
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
import org.junit.Before;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -72,6 +75,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.IntFunction;

import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -85,14 +89,6 @@
import static org.mockito.Mockito.mock;

public class FiltersAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;

@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
}

public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Expand All @@ -106,7 +102,12 @@ public void testEmpty() throws Exception {
}
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
builder.otherBucketKey("other");
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(response.getBuckets().size(), numFilters);
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
assertEquals(filter.getDocCount(), 0);
Expand Down Expand Up @@ -206,7 +207,12 @@ public void testKeyedFilter() throws Exception {
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
builder.otherBucket(true);
builder.otherBucketKey("other");
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters filters = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(filters.getBuckets().size(), 7);
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
Expand Down Expand Up @@ -261,7 +267,12 @@ public void testRandom() throws Exception {
builder.otherBucket(true);
builder.otherBucketKey("other");

final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
assertEquals(buckets.size(), filters.length + 1);

Expand Down Expand Up @@ -789,6 +800,157 @@ public void testSubAggsManyFilters() throws IOException {
}, dateFt, intFt);
}

public void testDocValuesFieldExistsForDate() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
QueryBuilder exists;
if (randomBoolean()) {
exists = new ExistsQueryBuilder("f");
} else {
// Range query covering all values in the index is rewritten to exists
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
}
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(exists, ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
});
}

public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return List.of(
new LongPoint("f", date),
new LongPoint("f", date + 10),
new SortedNumericDocValuesField("f", date),
new SortedNumericDocValuesField("f", date + 10)
);
});
}

public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
}

public void testDocValuesFieldExistsForNumber() throws IOException {
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
"f",
numberType,
true,
false,
true,
true,
null,
Map.of(),
null
);
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
return numberType.createFields("f", i, true, true, false);
});
}

public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
"f",
randomFrom(NumberFieldMapper.NumberType.values()),
true,
false,
true,
true,
null,
Map.of(),
null
));
}

public void testDocValuesFieldExistsForKeyword() throws IOException {
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of());
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
BytesRef text = new BytesRef(randomAlphaOfLength(5));
return List.of(new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("f", text));
});
}

public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of()));
}

private void docValuesFieldExistsTestCase(
QueryBuilder exists,
MappedFieldType fieldType,
boolean canUseMetadata,
IntFunction<List<? extends IndexableField>> buildDocWithField
) throws IOException {
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(buildDocWithField.apply(i));
}
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
debugTestCase(
builder,
new MatchAllDocsQuery(),
buildIndex,
(InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));

assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
Map<?, ?> filterAggDebug = debug.get("test");
List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
},
fieldType,
fnft
);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = new HashMap<>();
aggregator.collectDebugInfo(debug::put);
List<?> filtersDebug = (List<?>) debug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
}, fieldType, fnft);
}

private void docValuesFieldExistsNoDataTestCase(
MappedFieldType fieldType
) throws IOException {
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat(estimatedCost, equalTo(0L));
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
}, fieldType, fnft);
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
}, fieldType, fnft);
}

@Override
protected List<ObjectMapper> objectMappers() {
return MOCK_OBJECT_MAPPERS;
Expand Down
Loading

0 comments on commit 0cf63fc

Please sign in to comment.