From 9afee418356dd86f601807dce42d3e0713776f81 Mon Sep 17 00:00:00 2001 From: markharwood Date: Fri, 30 Jun 2017 15:18:40 +0100 Subject: [PATCH] Tests fix - Significant terms/text aggs return Lucene index-level statistics that when merged are assumed to be from different shards. The Aggregator unit tests assume segments can be treated as shards and thus break the significance stats and introduce double-counting of background doc frequencies. This change addresses this problem by ensuring test indexes have only one shard. Closes #25429 --- .../significant/SignificantTermsAggregatorTests.java | 11 ++++++++++- .../significant/SignificantTextAggregatorTests.java | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java index 20b2894b73e6..38ed7edf0603 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java @@ -95,7 +95,6 @@ public void testParsedAsFilter() throws IOException { /** * Uses the significant terms aggregation to find the keywords in text fields */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/25429") public void testSignificance() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); @@ -103,6 +102,9 @@ public void testSignificance() throws IOException { textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + indexWriterConfig.setMaxBufferedDocs(100); + indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { addMixedTextDocs(textFieldType, w); @@ -117,6 +119,7 @@ public void testSignificance() throws IOException { sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" @@ -183,6 +186,8 @@ public void testNumericSignificance() throws IOException { textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + indexWriterConfig.setMaxBufferedDocs(100); + indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment final long ODD_VALUE = 3; final long EVEN_VALUE = 6; final long COMMON_VALUE = 2; @@ -206,6 +211,7 @@ public void testNumericSignificance() throws IOException { sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" @@ -237,6 +243,8 @@ public void testUnmapped() throws IOException { textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + indexWriterConfig.setMaxBufferedDocs(100); + indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { addMixedTextDocs(textFieldType, w); @@ -245,6 +253,7 @@ public void testUnmapped() throws IOException { sigAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java index 1057d3a71e05..19b83b11a0f8 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java @@ -65,6 +65,8 @@ public void testSignificance() throws IOException { textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + indexWriterConfig.setMaxBufferedDocs(100); + indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); @@ -91,6 +93,7 @@ public void testSignificance() throws IOException { .subAggregation(sigAgg); try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" which should have no duplication @@ -128,6 +131,8 @@ public void testSignificanceOnTextArrays() throws IOException { textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + indexWriterConfig.setMaxBufferedDocs(100); + indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); @@ -140,6 +145,7 @@ public void testSignificanceOnTextArrays() throws IOException { SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text"); sigAgg.sourceFieldNames(Arrays.asList(new String [] {"title", "text"})); try (IndexReader reader = DirectoryReader.open(w)) { + assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); searchAndReduce(searcher, new TermQuery(new Term("text", "foo")), sigAgg, textFieldType); // No significant results to be found in this test - only checking we don't end up