Skip to content

Commit

Permalink
Speed up terms agg when alone (#69377)
Browse files Browse the repository at this point in the history
This speeds up the `terms` agg in a very specific case:
1. It has no child aggregations
2. It has no parent aggregations
3. There are no deleted documents
4. You are not using document level security
5. There is no top level query
6. The field has global ordinals
7. There are less than one thousand distinct terms

That is a lot of restirctions! But the speed up pretty substantial because
in those cases we can serve the entire aggregation using metadata that
lucene precomputes while it builds the index. In a real rally track we
have we get a 92% speed improvement, but the index isn't *that* big:

```
| 90th percentile service time | keyword-terms-low-cardinality |     446.031 |     36.7677 | -409.263 |     ms |
```

In a rally track with a larger index I ran some tests by hand and the
aggregation went from 2200ms to 8ms.

Even though there are 7 restrictions on this, I expect it to come into
play enough to matter. Restriction 6 just means you are aggregating on
a `keyword` field. Or an `ip`. And its fairly common for `keyword`s to
have less than a thousand distinct values. Certainly not everywhere, but
some places.

I expect "cold tier" indices are very very likely not to have deleted
documents at all. And the optimization works segment by segment - so
it'll save some time on each segment without deleted documents. But more
time if the entire index doesn't have any.

The optimization builds on #68871 which translates `terms` aggregations
against low cardinality fields with global ordinals into a `filters`
aggregation. This teaches the `filters` aggregation to recognize when
it can get its results from the index metadata. Rather, it creates the
infrastructure to make that fairly simple and applies it in the case of
the queries generated by the terms aggregation.
  • Loading branch information
nik9000 authored Feb 25, 2021
1 parent 2fd6337 commit 4ffdad3
Show file tree
Hide file tree
Showing 11 changed files with 759 additions and 223 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,4 @@ setup:
- match: { aggregations.f.buckets.foo.doc_count: 8 }
- match: { aggregations.f.buckets.xyz.doc_count: 5 }
- match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
- gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 }
- gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count_field: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.hasEntry;
import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.notNullValue;

@ESIntegTestCase.SuiteScopeTestCase
Expand Down Expand Up @@ -633,10 +635,16 @@ public void testFilterByFilter() throws InterruptedException, IOException {
assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0));
assertThat(delegateDebug, hasEntry("segments_with_doc_count_field", 0));
assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L)); // ~1,276,734 nanos is normal
List<?> filtersDebug = (List<?>) delegateDebug.get("filters");
assertThat(filtersDebug, hasSize(1));
Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
assertThat(queryDebug, hasEntry("query", "ConstantScore(DocValuesFieldExistsQuery [field=date])"));
}
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.elasticsearch.search.aggregations.bucket.filter;

import org.apache.lucene.search.Query;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
Expand All @@ -17,13 +16,13 @@
import org.elasticsearch.search.aggregations.support.AggregationContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class FiltersAggregatorFactory extends AggregatorFactory {

private final String[] keys;
private final Query[] filters;
private final List<QueryToFilterAdapter<?>> filters;
private final boolean keyed;
private final boolean otherBucket;
private final String otherBucketKey;
Expand All @@ -35,20 +34,17 @@ public FiltersAggregatorFactory(String name, List<KeyedFilter> filters, boolean
this.keyed = keyed;
this.otherBucket = otherBucket;
this.otherBucketKey = otherBucketKey;
keys = new String[filters.size()];
this.filters = new Query[filters.size()];
for (int i = 0; i < filters.size(); ++i) {
KeyedFilter keyedFilter = filters.get(i);
this.keys[i] = keyedFilter.key();
this.filters[i] = context.buildQuery(keyedFilter.filter());
this.filters = new ArrayList<>(filters.size());
for (KeyedFilter f : filters) {
this.filters.add(QueryToFilterAdapter.build(context.searcher(), f.key(), context.buildQuery(f.filter())));
}
}

@Override
public Aggregator createInternal(Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata) throws IOException {
return FiltersAggregator.build(name, factories, keys, filters, keyed,
return FiltersAggregator.build(name, factories, filters, keyed,
otherBucket ? otherBucketKey : null, context, parent, cardinality, metadata);
}
}
Loading

0 comments on commit 4ffdad3

Please sign in to comment.