Skip to content

Commit

Permalink
Use LogDocMergePolicy in CompositeAggregatorTests (#103174) (#105494)
Browse files Browse the repository at this point in the history
* Use LogDocMergePolicy in CompositeAggregatorTests

The latest lucene upgrade adds a new merge policy that reverse the
order of the documents. To avoid randomisation we hardcode the merge
policy to LogDocMergePolicy.

This has also been applied to NestedAggregatorTests, so the logic is
moved to AggregatorTestCase to be available to all agg tests.

Fixes #103105

* make policy optional

* update comment

* suggested updates

Co-authored-by: Kostas Krikellas <[email protected]>
  • Loading branch information
iverase and kkrik-es authored Feb 14, 2024
1 parent 1ed60a2 commit 49fda11
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FieldExistsQuery;
Expand Down Expand Up @@ -766,7 +767,10 @@ public void testUsingTestCase() throws Exception {
assertEquals(2L, result.getBuckets().get(1).getDocCount());
assertEquals("{keyword=d}", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}, new AggTestConfig(new CompositeAggregationBuilder("name", Collections.singletonList(terms)), FIELD_TYPES));
},
new AggTestConfig(new CompositeAggregationBuilder("name", Collections.singletonList(terms)), FIELD_TYPES)
.withLogDocMergePolicy()
);
}

/**
Expand Down Expand Up @@ -819,7 +823,7 @@ public void testSubAggregationOfNested() throws Exception {
builder,
new KeywordFieldMapper.KeywordFieldType(nestedPath + "." + leafNameField),
new NumberFieldMapper.NumberFieldType("price", NumberFieldMapper.NumberType.LONG)
)
).withLogDocMergePolicy()
);
}

Expand Down Expand Up @@ -874,7 +878,7 @@ public void testSubAggregationOfNestedAggregateAfter() throws Exception {
builder,
new KeywordFieldMapper.KeywordFieldType(nestedPath + "." + leafNameField),
new NumberFieldMapper.NumberFieldType("price", NumberFieldMapper.NumberType.LONG)
)
).withLogDocMergePolicy()
);
}

Expand Down Expand Up @@ -3095,7 +3099,7 @@ public void testIndexSortWithDuplicate() throws Exception {

public void testParentFactoryValidation() throws Exception {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
try (RandomIndexWriter indexWriter = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
Document document = new Document();
document.clear();
addToDocument(0, document, createDocument("term-field", "a", "long", 100L));
Expand Down Expand Up @@ -3650,6 +3654,9 @@ private <T extends AggregationBuilder, V extends InternalAggregation> void execu
}
if (forceMerge == false) {
config.setMergePolicy(NoMergePolicy.INSTANCE);
} else {
// Use LogDocMergePolicy to avoid randomization issues with the doc retrieval order.
config.setMergePolicy(new LogDocMergePolicy());
}
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, config)) {
Document document = new Document();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.Queries;
Expand Down Expand Up @@ -137,14 +136,9 @@ protected ScriptService getMockScriptService() {
return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS, () -> 1L);
}

private static RandomIndexWriter newRandomIndexWriter(Directory directory) throws IOException {
final IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy());
return new RandomIndexWriter(random(), directory, conf);
}

public void testNoDocs() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
// intentionally not writing any docs
}
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
Expand All @@ -171,7 +165,7 @@ public void testSingleNestingMax() throws IOException {
int expectedNestedDocs = 0;
double expectedMaxValue = Double.NEGATIVE_INFINITY;
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numRootDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -220,7 +214,7 @@ public void testDoubleNestingMax() throws IOException {
int expectedNestedDocs = 0;
double expectedMaxValue = Double.NEGATIVE_INFINITY;
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numRootDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -270,7 +264,7 @@ public void testOrphanedDocs() throws IOException {
int expectedNestedDocs = 0;
double expectedSum = 0;
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numRootDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -394,7 +388,7 @@ public void testResetRootDocId() throws Exception {

public void testNestedOrdering() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
iw.addDocuments(generateBook("1", new String[] { "a" }, new int[] { 12, 13, 14 }));
iw.addDocuments(generateBook("2", new String[] { "b" }, new int[] { 5, 50 }));
iw.addDocuments(generateBook("3", new String[] { "c" }, new int[] { 39, 19 }));
Expand Down Expand Up @@ -521,7 +515,7 @@ public void testNestedOrdering_random() throws IOException {
books.add(Tuple.tuple(Strings.format("%03d", i), chapters));
}
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
int id = 0;
for (Tuple<String, int[]> book : books) {
iw.addDocuments(generateBook(Strings.format("%03d", id), new String[] { book.v1() }, book.v2()));
Expand Down Expand Up @@ -571,7 +565,7 @@ public void testNestedOrdering_random() throws IOException {

public void testPreGetChildLeafCollectors() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
LuceneDocument document = new LuceneDocument();
document.add(new StringField(IdFieldMapper.NAME, Uid.encodeId("1"), Field.Store.NO));
Expand Down Expand Up @@ -689,7 +683,7 @@ public void testFieldAlias() throws IOException {
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(VALUE_FIELD_NAME, NumberFieldMapper.NumberType.LONG);

try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numRootDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -730,7 +724,7 @@ public void testNestedWithPipeline() throws IOException {
int expectedNestedDocs = 0;
double expectedMaxValue = Double.NEGATIVE_INFINITY;
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numRootDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
expectedMaxValue = Math.max(expectedMaxValue, generateMaxDocs(documents, 1, i, NESTED_OBJECT, VALUE_FIELD_NAME));
Expand Down Expand Up @@ -796,7 +790,7 @@ public void testNestedUnderTerms() throws IOException {
)
);
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
buildResellerData(numProducts, numResellers).accept(iw);
}
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand Down Expand Up @@ -62,14 +59,9 @@ protected DirectoryReader wrapDirectoryReader(DirectoryReader reader) throws IOE
return wrapInMockESDirectoryReader(reader);
}

private static RandomIndexWriter newRandomIndexWriter(Directory directory) throws IOException {
final IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy());
return new RandomIndexWriter(random(), directory, conf);
}

public void testNoDocs() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
// intentionally not writing any docs
}
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
Expand Down Expand Up @@ -98,7 +90,7 @@ public void testMaxFromParentDocs() throws IOException {
int expectedNestedDocs = 0;
double expectedMaxValue = Double.NEGATIVE_INFINITY;
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numParentDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -154,7 +146,7 @@ public void testFieldAlias() throws IOException {
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(VALUE_FIELD_NAME, NumberFieldMapper.NumberType.LONG);

try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
for (int i = 0; i < numParentDocs; i++) {
List<Iterable<IndexableField>> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
Expand Down Expand Up @@ -219,7 +211,7 @@ public void testNestedUnderTerms() throws IOException {
);

try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = newRandomIndexWriter(directory)) {
try (RandomIndexWriter iw = newRandomIndexWriterWithLogDocMergePolicy(directory)) {
NestedAggregatorTests.buildResellerData(numProducts, numResellers).accept(iw);
}
try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
Expand Down Expand Up @@ -487,6 +488,17 @@ protected ScriptService getMockScriptService() {
return null;
}

/**
* Create a RandomIndexWriter that uses the LogDocMergePolicy.
*
* The latest lucene upgrade adds a new merge policy that reverses the order of the documents and it is not compatible with some
* aggregation types. This writer avoids randomization by hardcoding the merge policy to LogDocMergePolicy.
*/
protected static RandomIndexWriter newRandomIndexWriterWithLogDocMergePolicy(Directory directory) throws IOException {
final IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy());
return new RandomIndexWriter(random(), directory, conf);
}

/**
* Collects all documents that match the provided query {@link Query} and
* returns the reduced {@link InternalAggregation}.
Expand Down Expand Up @@ -713,6 +725,10 @@ protected <T extends AggregationBuilder, V extends InternalAggregation> void tes
boolean timeSeries = aggTestConfig.builder().isInSortOrderExecutionRequired();
try (Directory directory = newDirectory()) {
IndexWriterConfig config = LuceneTestCase.newIndexWriterConfig(random(), new MockAnalyzer(random()));
if (aggTestConfig.useLogDocMergePolicy()) {
// Use LogDocMergePolicy to avoid randomization issues with the doc retrieval order for nested aggs.
config.setMergePolicy(new LogDocMergePolicy());
}
if (timeSeries) {
Sort sort = new Sort(
new SortField(TimeSeriesIdFieldMapper.NAME, SortField.Type.STRING, false),
Expand Down Expand Up @@ -1524,11 +1540,13 @@ public record AggTestConfig(
boolean splitLeavesIntoSeparateAggregators,
boolean shouldBeCached,
boolean incrementalReduce,

boolean useLogDocMergePolicy,
MappedFieldType... fieldTypes
) {

public AggTestConfig(AggregationBuilder builder, MappedFieldType... fieldTypes) {
this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), fieldTypes);
this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), false, fieldTypes);
}

public AggTestConfig withQuery(Query query) {
Expand All @@ -1539,6 +1557,7 @@ public AggTestConfig withQuery(Query query) {
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
useLogDocMergePolicy,
fieldTypes
);
}
Expand All @@ -1551,6 +1570,7 @@ public AggTestConfig withSplitLeavesIntoSeperateAggregators(boolean splitLeavesI
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
useLogDocMergePolicy,
fieldTypes
);
}
Expand All @@ -1563,6 +1583,7 @@ public AggTestConfig withShouldBeCached(boolean shouldBeCached) {
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
useLogDocMergePolicy,
fieldTypes
);
}
Expand All @@ -1575,6 +1596,7 @@ public AggTestConfig withMaxBuckets(int maxBuckets) {
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
useLogDocMergePolicy,
fieldTypes
);
}
Expand All @@ -1587,6 +1609,20 @@ public AggTestConfig withIncrementalReduce(boolean incrementalReduce) {
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
useLogDocMergePolicy,
fieldTypes
);
}

public AggTestConfig withLogDocMergePolicy() {
return new AggTestConfig(
query,
builder,
maxBuckets,
splitLeavesIntoSeparateAggregators,
shouldBeCached,
incrementalReduce,
true,
fieldTypes
);
}
Expand Down

0 comments on commit 49fda11

Please sign in to comment.