Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize lone single bucket date_histogram #71180

Merged
merged 16 commits into from
May 12, 2021
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedOrdinalsSamplerAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.search.profile.ProfileResult;
import org.elasticsearch.search.profile.ProfileShardResult;
import org.elasticsearch.test.ESIntegTestCase;
Expand Down Expand Up @@ -595,7 +596,9 @@ public void testFilterByFilter() throws InterruptedException, IOException {

SearchResponse response = client().prepareSearch("dateidx")
.setProfile(true)
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
.addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH)
// Add a sub-agg so we don't get to use metadata. That's great and all, but it outputs less debugging info for us to verify.
.subAggregation(new MaxAggregationBuilder("m").field("date")))
.get();
assertSearchResponse(response);
Map<String, ProfileShardResult> profileResults = response.getProfileResults();
Expand All @@ -612,7 +615,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
assertThat(histoAggResult, notNullValue());
assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
assertThat(histoAggResult.getTime(), greaterThan(0L));
Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
assertThat(breakdown, notNullValue());
Expand Down Expand Up @@ -644,7 +647,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

package org.elasticsearch.search.aggregations.bucket.filter;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
Expand Down Expand Up @@ -52,6 +55,15 @@ public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key,
if (query instanceof TermQuery) {
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
}
if (query instanceof ConstantScoreQuery) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like an obvious question "Why don't we check for a wrapped TermsQuery or MatchAllDocsQuery?" Would be good to have a comment to answer that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I've not seen it come up. But, looking at it with fresh eyes now, I think the safest thing is to always unwrap.

Query wrapped = ((ConstantScoreQuery) query).getQuery();
if (wrapped instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) wrapped);
}
}
if (query instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
}
if (query instanceof MatchAllDocsQuery) {
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
}
Expand Down Expand Up @@ -386,4 +398,50 @@ void collectDebugInfo(BiConsumer<String, Object> add) {
add.accept("results_from_metadata", resultsFromMetadata);
}
}

private static class DocValuesFieldExistsAdapter extends QueryToFilterAdapter<DocValuesFieldExistsQuery> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point, keeping all the implementations as static inner classes is going to get unwieldy. Do you think we should refactor QueryToFilterAdapeter into its own package and make these static inners top level package private classes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah.... I'll see about breaking them out in a mechanical follow up PR.

private int resultsFromMetadata;

private DocValuesFieldExistsAdapter(IndexSearcher searcher, String key, DocValuesFieldExistsQuery query) {
super(searcher, key, query);
}

@Override
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
if (countCanUseMetadata(counter, live) && canCountFromMetadata(ctx)) {
resultsFromMetadata++;
PointValues points = ctx.reader().getPointValues(query().getField());
if (points == null) {
return 0;
}
return points.getDocCount();

}
return super.count(ctx, counter, live);
}

@Override
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
return 0;
}
return super.estimateCountCost(ctx, canUseMetadata);
}

private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
if (info == null) {
// If we don't have any info then there aren't any values anyway.
return true;
}
return info.getPointDimensionCount() > 0;
}

@Override
void collectDebugInfo(BiConsumer<String, Object> add) {
super.collectDebugInfo(add);
add.accept("specialized_for", "docvalues_field_exists");
add.accept("results_from_metadata", resultsFromMetadata);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
Expand All @@ -32,12 +34,14 @@
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
import org.elasticsearch.index.mapper.DocCountFieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -61,7 +65,6 @@
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubsetDirectoryReader;
import org.junit.Before;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -72,6 +75,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.IntFunction;

import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -84,14 +88,6 @@
import static org.mockito.Mockito.mock;

public class FiltersAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;

@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType("field");
}

public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Expand All @@ -105,7 +101,12 @@ public void testEmpty() throws Exception {
}
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
builder.otherBucketKey("other");
InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(response.getBuckets().size(), numFilters);
for (InternalFilters.InternalBucket filter : response.getBuckets()) {
assertEquals(filter.getDocCount(), 0);
Expand Down Expand Up @@ -176,7 +177,12 @@ public void testKeyedFilter() throws Exception {
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
builder.otherBucket(true);
builder.otherBucketKey("other");
final InternalFilters filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters filters = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
assertEquals(filters.getBuckets().size(), 7);
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
Expand Down Expand Up @@ -231,7 +237,12 @@ public void testRandom() throws Exception {
builder.otherBucket(true);
builder.otherBucketKey("other");

final InternalFilters response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
final InternalFilters response = searchAndReduce(
indexSearcher,
new MatchAllDocsQuery(),
builder,
new KeywordFieldMapper.KeywordFieldType("field")
);
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
assertEquals(buckets.size(), filters.length + 1);

Expand Down Expand Up @@ -759,6 +770,157 @@ public void testSubAggsManyFilters() throws IOException {
}, dateFt, intFt);
}

public void testDocValuesFieldExistsForDate() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
QueryBuilder exists;
if (randomBoolean()) {
exists = new ExistsQueryBuilder("f");
} else {
// Range query covering all values in the index is rewritten to exists
exists = new RangeQueryBuilder("f").gte("2020-01-01").lt("2020-01-02");
}
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(exists, ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return List.of(new LongPoint("f", date), new NumericDocValuesField("f", date));
});
}

public void testDocValuesFieldExistsForDateWithMultiValuedFields() throws IOException {
DateFieldMapper.DateFieldType ft = new DateFieldMapper.DateFieldType("f");
long start = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01T00:00:01");
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
long date = start + TimeUnit.HOURS.toMillis(i);
return List.of(
new LongPoint("f", date),
new LongPoint("f", date + 10),
new SortedNumericDocValuesField("f", date),
new SortedNumericDocValuesField("f", date + 10)
);
});
}

public void testDocValuesFieldExistsForDateWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new DateFieldMapper.DateFieldType("f"));
}

public void testDocValuesFieldExistsForNumber() throws IOException {
NumberFieldMapper.NumberType numberType = randomFrom(NumberFieldMapper.NumberType.values());
NumberFieldMapper.NumberFieldType ft = new NumberFieldMapper.NumberFieldType(
"f",
numberType,
true,
false,
true,
true,
null,
Map.of(),
null
);
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, true, i -> {
return numberType.createFields("f", i, true, true, false);
});
}

public void testDocValuesFieldExistsForNumberWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new NumberFieldMapper.NumberFieldType(
"f",
randomFrom(NumberFieldMapper.NumberType.values()),
true,
false,
true,
true,
null,
Map.of(),
null
));
}

public void testDocValuesFieldExistsForKeyword() throws IOException {
KeywordFieldMapper.KeywordFieldType ft = new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of());
docValuesFieldExistsTestCase(new ExistsQueryBuilder("f"), ft, false, i -> {
BytesRef text = new BytesRef(randomAlphaOfLength(5));
return List.of(new Field("f", text, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("f", text));
});
}

public void testDocValuesFieldExistsForKeywordWithoutData() throws IOException {
docValuesFieldExistsNoDataTestCase(new KeywordFieldMapper.KeywordFieldType("f", true, true, Map.of()));
}

private void docValuesFieldExistsTestCase(
QueryBuilder exists,
MappedFieldType fieldType,
boolean canUseMetadata,
IntFunction<List<? extends IndexableField>> buildDocWithField
) throws IOException {
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(buildDocWithField.apply(i));
}
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
debugTestCase(
builder,
new MatchAllDocsQuery(),
buildIndex,
(InternalFilters result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(10L));

assertThat(impl, equalTo(FiltersAggregator.FilterByFilter.class));
Map<?, ?> filterAggDebug = debug.get("test");
List<?> filtersDebug = (List<?>) filterAggDebug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(filterDebug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat((int) filterDebug.get("results_from_metadata"), canUseMetadata ? greaterThan(0) : equalTo(0));
},
fieldType,
fnft
);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = new HashMap<>();
aggregator.collectDebugInfo(debug::put);
List<?> filtersDebug = (List<?>) debug.get("filters");
Map<?, ?> filterDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(estimatedCost, canUseMetadata ? equalTo(0L) : greaterThan(0L));
assertThat((int) filterDebug.get("scorers_prepared_while_estimating_cost"), canUseMetadata ? equalTo(0) : greaterThan(0));
}, fieldType, fnft);
}

private void docValuesFieldExistsNoDataTestCase(
MappedFieldType fieldType
) throws IOException {
QueryBuilder exists = new ExistsQueryBuilder(fieldType.name());
AggregationBuilder builder = new FiltersAggregationBuilder("test", new KeyedFilter("q1", exists));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
for (int i = 0; i < 10; i++) {
iw.addDocument(List.of());
}
};
// Exists queries convert to MatchNone if this isn't defined
FieldNamesFieldMapper.FieldNamesFieldType fnft = new FieldNamesFieldMapper.FieldNamesFieldType(true);
withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
long estimatedCost = ((FiltersAggregator.FilterByFilter) aggregator).estimateCost(Long.MAX_VALUE);
Map<String, Object> debug = collectAndGetFilterDebugInfo(searcher, aggregator);
assertThat(debug, hasEntry("specialized_for", "docvalues_field_exists"));
assertThat(estimatedCost, equalTo(0L));
assertThat((int) debug.get("results_from_metadata"), greaterThan(0));
assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), equalTo(0));
}, fieldType, fnft);
testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalFilters result) -> {
assertThat(result.getBuckets(), hasSize(1));
assertThat(result.getBucketByKey("q1").getDocCount(), equalTo(0L));
}, fieldType, fnft);
}

@Override
protected List<ObjectMapper> objectMappers() {
return MOCK_OBJECT_MAPPERS;
Expand Down
Loading