From ba913e30f6f323272def048d10a0a6735d86d8ca Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 3 Feb 2020 16:03:09 -0500 Subject: [PATCH 1/2] Fix a sneaky bug in rare_terms When the `rare_terms` aggregation contained another aggregation it'd break them. Most of the time. This happened because the process that it uses to remove buckets that turn out not to be rare was incorrectly merging results from multiple leaves. This'd cause array index out of bounds issues. We didn't catch it in the test because the issue doesn't happen on the very first bucket. And the tests generated data in such a way that the first bucket always contained the rare terms. Randomizing the order of the generated data fixed the test so it caught the issue. Closes #51020 --- .../search.aggregation/280_rare_terms.yml | 42 +++++++++++++++++++ .../terms/AbstractRareTermsAggregator.java | 7 ++-- .../bucket/terms/LongRareTermsAggregator.java | 5 +-- .../terms/StringRareTermsAggregator.java | 5 +-- .../terms/RareTermsAggregatorTests.java | 4 +- 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml index a82caddd9cfd4..5864acfd4a4c2 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml @@ -313,4 +313,46 @@ setup: - match: { hits.total.value: 1 } - length: { aggregations.long_terms.buckets: 0 } +--- +"sub aggs": + - do: + index: + refresh: true + index: test_1 + id: 1 + body: { "str" : "abc", "number": 1 } + + - do: + index: + refresh: true + index: test_1 + id: 2 + body: { "str": "abc", "number": 2 } + - do: + index: + refresh: true + index: test_1 + id: 3 + body: { "str": "bcd", "number": 3 } + + - do: + search: + body: + size: 0 + aggs: + str_terms: + rare_terms: + field: str + max_doc_count: 1 + aggs: + max_n: + max: + field: number + + - match: { hits.total.value: 3 } + - length: { aggregations.str_terms.buckets: 1 } + - match: { aggregations.str_terms.buckets.0.key: "bcd" } + - is_false: aggregations.str_terms.buckets.0.key_as_string + - match: { aggregations.str_terms.buckets.0.doc_count: 1 } + - match: { aggregations.str_terms.buckets.0.max_n.value: 3.0 } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java index 2bbe3c01988df..bacc2ef217004 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java @@ -50,7 +50,6 @@ public abstract class AbstractRareTermsAggregator dataset, try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { Document document = new Document(); - for (Long value : dataset) { + List shuffledDataset = new ArrayList<>(dataset); + Collections.shuffle(shuffledDataset, random()); + for (Long value : shuffledDataset) { if (frequently()) { indexWriter.commit(); } From be0c4ab316b60e427694c2a95b8caaa3231336a7 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 4 Feb 2020 10:25:12 -0500 Subject: [PATCH 2/2] Skip old --- .../rest-api-spec/test/search.aggregation/280_rare_terms.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml index 5864acfd4a4c2..04d76a987809d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml @@ -315,6 +315,10 @@ setup: --- "sub aggs": + - skip: + version: " - 7.99.99" + reason: Sub aggs fixed in 8.0 (to be backported to 7.6.1) + - do: index: refresh: true