From 2e969cf6fe1af55a4985e1cd18903149c879bd1b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 27 Apr 2021 15:10:45 -0400 Subject: [PATCH] Fix composite early termination on sorted (backport of #72101) (#72127) I broke composite early termination when reworking how aggregations' contact for `getLeafCollector` around early termination in #70320. We didn't see it in our tests because we weren't properly emulating the aggregation collection stage. This fixes early termination by adhering to the new contract and adds more tests. Closes #72078 Co-authored-by: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> --- .../235_composite_sorted.yml | 191 ++++++++++++++++++ .../bucket/composite/CompositeAggregator.java | 16 +- .../composite/CompositeAggregatorTests.java | 3 +- .../aggregations/AggregatorTestCase.java | 2 +- x-pack/qa/runtime-fields/build.gradle | 5 +- 5 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/235_composite_sorted.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/235_composite_sorted.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/235_composite_sorted.yml new file mode 100644 index 0000000000000..e2a7fcbbad725 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/235_composite_sorted.yml @@ -0,0 +1,191 @@ +--- +setup: + - do: + indices.create: + index: sorted + body: + mappings: + properties: + date: + type: date + kw: + type: keyword + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + sort: + field: date + order: desc + + # Index single documents at a time and refresh. That'll create as many + # segments as possible which has revealed bugs in the past. + - do: + index: + index: sorted + body: {"date": "2021-01-01T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-02T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-03T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-04T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-05T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-06T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-07T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-08T00:00:00.000Z", "kw": "cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-09T00:00:00.000Z", "kw": "not a cat"} + refresh: true + - do: + index: + index: sorted + body: {"date": "2021-01-10T00:00:00.000Z", "kw": "also not a cat"} + refresh: true + +--- +one source - first page: + - skip: + version: " - 6.99.99" + reason: Format added in 7.0.0 + - do: + search: + index: sorted + body: + size: 0 + aggs: + c: + composite: + size: 2 + sources: + - date: + date_histogram: + field: date + calendar_interval: day + format: iso8601 # Format makes the comparisons a little more obvious + - length: { aggregations.c.buckets: 2 } + - match: { aggregations.c.buckets.0.key.date: "2021-01-01T00:00:00.000Z" } + - match: { aggregations.c.buckets.0.doc_count: 1 } + - match: { aggregations.c.buckets.1.key.date: "2021-01-02T00:00:00.000Z" } + - match: { aggregations.c.buckets.1.doc_count: 1 } + +--- +one source - second page: + - skip: + version: " - 6.99.99" + reason: Format added in 7.0.0 + - do: + search: + index: sorted + body: + size: 0 + aggs: + c: + composite: + size: 2 + sources: + - date: + date_histogram: + field: date + calendar_interval: day + format: iso8601 # Format makes the comparisons a little more obvious + after: + date: "2021-01-02T00:00:00.000Z" + - length: { aggregations.c.buckets: 2 } + - match: { aggregations.c.buckets.0.key.date: "2021-01-03T00:00:00.000Z" } + - match: { aggregations.c.buckets.0.doc_count: 1 } + - match: { aggregations.c.buckets.1.key.date: "2021-01-04T00:00:00.000Z" } + - match: { aggregations.c.buckets.1.doc_count: 1 } + +--- +two sources - first page: + - skip: + version: " - 6.99.99" + reason: Format added in 7.0.0 + - do: + search: + index: sorted + body: + size: 0 + aggs: + c: + composite: + size: 2 + sources: + - date: + date_histogram: + field: date + calendar_interval: day + format: iso8601 # Format makes the comparisons a little more obvious + - kw: + terms: + field: kw + - length: { aggregations.c.buckets: 2 } + - match: { aggregations.c.buckets.0.key.date: "2021-01-01T00:00:00.000Z" } + - match: { aggregations.c.buckets.0.key.kw: cat } + - match: { aggregations.c.buckets.0.doc_count: 1 } + - match: { aggregations.c.buckets.1.key.date: "2021-01-02T00:00:00.000Z" } + - match: { aggregations.c.buckets.1.key.kw: cat } + - match: { aggregations.c.buckets.1.doc_count: 1 } + +--- +two sources - second page: + - skip: + version: " - 6.99.99" + reason: Format added in 7.0.0 + - do: + search: + index: sorted + body: + size: 0 + aggs: + c: + composite: + size: 2 + sources: + - date: + date_histogram: + field: date + calendar_interval: day + format: iso8601 # Format makes the comparisons a little more obvious + - kw: + terms: + field: kw + after: + date: "2021-01-02T00:00:00.000Z" + kw: cat + - length: { aggregations.c.buckets: 2 } + - match: { aggregations.c.buckets.0.key.date: "2021-01-03T00:00:00.000Z" } + - match: { aggregations.c.buckets.0.key.kw: cat } + - match: { aggregations.c.buckets.0.doc_count: 1 } + - match: { aggregations.c.buckets.1.key.date: "2021-01-04T00:00:00.000Z" } + - match: { aggregations.c.buckets.1.key.kw: cat } + - match: { aggregations.c.buckets.1.doc_count: 1 } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 53360186cb58b..823f1bcd39664 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -406,10 +406,22 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket // We have an after key and index sort is applicable so we jump directly to the doc // that is after the index sort prefix using the rawAfterKey and we start collecting // document from there. - processLeafFromQuery(ctx, indexSortPrefix); + try { + processLeafFromQuery(ctx, indexSortPrefix); + } catch (CollectionTerminatedException e) { + /* + * Signal that there isn't anything to collect. We're going + * to return noop collector anyway so we can ignore it. + */ + } return LeafBucketCollector.NO_OP_COLLECTOR; } else { - final LeafBucketCollector inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen)); + final LeafBucketCollector inner; + try { + inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen)); + } catch (CollectionTerminatedException e) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } return new LeafBucketCollector() { @Override public void collect(int doc, long zeroBucket) throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java index 76ae2326397b1..0c4014c36f5cd 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java @@ -2656,7 +2656,8 @@ private static long asLong(String dateTime) { private static Sort buildIndexSort(List> sources, Map fieldTypes) { List sortFields = new ArrayList<>(); Map remainingFieldTypes = new HashMap<>(fieldTypes); - for (CompositeValuesSourceBuilder source : sources) { + List> sourcesToCreateSorts = randomBoolean() ? sources : sources.subList(0, 1); + for (CompositeValuesSourceBuilder source : sourcesToCreateSorts) { MappedFieldType type = fieldTypes.remove(source.field()); remainingFieldTypes.remove(source.field()); SortField sortField = sortFieldFrom(type); diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 328fb9aba81bd..30568278214d6 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -491,7 +491,7 @@ protected A searchAndReduc } } else { root.preCollection(); - searcher.search(rewritten, root); + searcher.search(rewritten, MultiBucketCollector.wrap(true, org.elasticsearch.common.collect.List.of(root))); root.postCollection(); aggs.add(root.buildTopLevel()); } diff --git a/x-pack/qa/runtime-fields/build.gradle b/x-pack/qa/runtime-fields/build.gradle index 593cc26c60830..cea7baa916f5e 100644 --- a/x-pack/qa/runtime-fields/build.gradle +++ b/x-pack/qa/runtime-fields/build.gradle @@ -78,11 +78,12 @@ subprojects { 'search.aggregation/20_terms/string profiler via global ordinals native implementation', 'search.aggregation/20_terms/Global ordinals are loaded with the global_ordinals execution hint', 'search.aggregation/170_cardinality_metric/profiler string', - //dynamic template causes a type _doc to be created, these tests use another type but only one type is allowed + 'search.aggregation/235_composite_sorted/*', + //dynamic template causes a type _doc to be created, these tests use another type but only one type is allowed 'search.aggregation/51_filter_with_types/*', 'search/171_terms_query_with_types/*', 'search/340_type_query/type query', - 'msearch/12_basic_with_types/*' + 'msearch/12_basic_with_types/*', /////// NOT SUPPORTED /////// ].join(',') }